logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 3afdd1d9941527e23cd7c05d2c15dd24a32de834
parent: 39ebe1d5193bf62340bc101d51cab77df3c06f7e
Author: asciimoo <asciimoo@gmail.com>
Date:   Sun, 19 Jan 2014 00:17:02 +0100

[enh] settings unification - new dependency: pyyaml

Diffstat:

M.gitignore2++
MREADME.md3+--
Dengines.cfg_sample99-------------------------------------------------------------------------------
Mrequirements.txt1+
Msearx/__init__.py22++++++++++++++++++++++
Msearx/engines/__init__.py23+++++++++--------------
Msearx/webapp.py12++++++------
Asettings.yml107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msetup.py1+
Mversions.cfg1+
10 files changed, 150 insertions(+), 121 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,6 +1,8 @@ env engines.cfg .installed.cfg +.coverage +covearge/ setup.cfg *.pyc diff --git a/README.md b/README.md @@ -25,8 +25,7 @@ List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instanc * clone source: `git clone git@github.com:asciimoo/searx.git && cd searx` * install dependencies: `pip install -r requirements.txt` -* edit your [searx/settings.py](https://github.com/asciimoo/searx/blob/master/searx/settings.py) (set your `secret_key`!) -* rename `engines.cfg_sample` to `engines.cfg` +* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!) * run `python searx/webapp.py` to start the application For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation) diff --git a/engines.cfg_sample b/engines.cfg_sample @@ -1,99 +0,0 @@ -[wikipedia] -engine = mediawiki -url = https://en.wikipedia.org/ -number_of_results = 1 - -[bing] -engine = bing -locale = en-US - -[currency] -engine=currency_convert -categories = general - -[deviantart] -engine = deviantart -categories = images - -[ddg definitions] -engine = duckduckgo_definitions - -[duckduckgo] -engine = duckduckgo -locale = en-us - -[filecrop] -engine = filecrop -categories = files - -[flickr] -engine = flickr -categories = images - -[github] -engine = github -categories = it - -[google] -engine = json_engine -search_url = https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query} -categories = general -url_query = /responseData/results/unescapedUrl -content_query = /responseData/results/content -title_query = /responseData/results/titleNoFormatting - -[google images] -engine = google_images -categories = images - -[piratebay] -engine = piratebay -categories = videos, music, files - -[soundcloud] -engine = soundcloud -categories = music - -[stackoverflow] -engine = stackoverflow -categories = it - -[startpage] -engine = startpage - -[twitter] -engine = twitter -categories = social media - -[urbandictionary] -engine = xpath -search_url = http://www.urbandictionary.com/define.php?term={query} -url_xpath = //div[@class="word"]//a/@href -title_xpath = //div[@class="word"]//a -content_xpath = //div[@class="definition"] - -[yahoo] -engine = xpath -search_url = http://search.yahoo.com/search?p={query} -results_xpath = //div[@class="res"] -url_xpath = .//h3/a/@href -title_xpath = .//h3/a -content_xpath = .//div[@class="abstr"] -suggestion_xpath = //div[@id="satat"]//a - -[youtube] -engine = youtube -categories = videos - -[dailymotion] -engine = dailymotion -locale = en_US -categories = videos - -[vimeo] -engine = vimeo -categories = videos -results_xpath = //div[@id="browse_content"]/ol/li -url_xpath=./a/@href -title_xpath=./a/div[@class="data"]/p[@class="title"]/text() -content_xpath=./a/img/@src diff --git a/requirements.txt b/requirements.txt @@ -1,3 +1,4 @@ flask grequests lxml +pyyaml diff --git a/searx/__init__.py b/searx/__init__.py @@ -0,0 +1,22 @@ +from os import environ +from os.path import realpath, dirname, join +try: + from yaml import load +except: + from sys import exit, stderr + stderr.write('[E] install pyyaml\n') + exit(2) + + +searx_dir = realpath(dirname(realpath(__file__))+'/../') +engine_dir = dirname(realpath(__file__)) + +if 'SEARX_SETTINGS_PATH' in environ: + settings_path = environ['SEARX_SETTINGS_PATH'] +else: + settings_path = join(searx_dir, 'settings.yml') + + +with open(settings_path) as settings_yaml: + settings = load(settings_yaml) + diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py @@ -24,15 +24,11 @@ from operator import itemgetter from urlparse import urlparse from searx import settings from searx.utils import gen_useragent -import ConfigParser import sys from datetime import datetime engine_dir = dirname(realpath(__file__)) -searx_dir = join(engine_dir, '../../') -engines_config = ConfigParser.SafeConfigParser() -engines_config.read(join(searx_dir, 'engines.cfg')) number_of_searches = 0 engines = {} @@ -48,24 +44,23 @@ def load_module(filename): module.name = modname return module -if not engines_config.sections(): - print '[E] Error no engines found. Edit your engines.cfg' +if not 'engines' in settings or not settings['engines']: + print '[E] Error no engines found. Edit your settings.yml' exit(2) -for engine_config_name in engines_config.sections(): - engine_data = engines_config.options(engine_config_name) - engine = load_module(engines_config.get(engine_config_name, 'engine')+'.py') - engine.name = engine_config_name +for engine_data in settings['engines']: + engine_name = engine_data['engine'] + engine = load_module(engine_name+'.py') for param_name in engine_data: if param_name == 'engine': continue if param_name == 'categories': - if engines_config.get(engine_config_name, param_name) == 'none': + if engine_data['categories'] == 'none': engine.categories = [] else: - engine.categories = map(str.strip, engines_config.get(engine_config_name, param_name).split(',')) + engine.categories = map(str.strip, engine_data['categories'].split(',')) continue - setattr(engine, param_name, engines_config.get(engine_config_name, param_name)) + setattr(engine, param_name, engine_data[param_name]) for engine_attr in dir(engine): if engine_attr.startswith('_'): continue @@ -170,7 +165,7 @@ def search(query, request, selected_engines): request_args = dict(headers = request_params['headers'] ,hooks = dict(response=callback) ,cookies = request_params['cookies'] - ,timeout = settings.request_timeout + ,timeout = settings['server']['request_timeout'] ) if request_params['method'] == 'GET': diff --git a/searx/webapp.py b/searx/webapp.py @@ -41,7 +41,7 @@ from searx.utils import highlight_content, html_to_text app = Flask(__name__) -app.secret_key = settings.secret_key +app.secret_key = settings['server']['secret_key'] opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?> @@ -58,8 +58,8 @@ opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?> def get_base_url(): - if settings.base_url: - hostname = settings.base_url + if settings['server']['base_url']: + hostname = settings['server']['base_url'] else: scheme = 'http' if request.is_secure: @@ -243,9 +243,9 @@ def run(): from gevent import monkey monkey.patch_all() - app.run(debug = settings.debug - ,use_debugger = settings.debug - ,port = settings.port + app.run(debug = settings['server']['debug'] + ,use_debugger = settings['server']['debug'] + ,port = settings['server']['port'] ) diff --git a/settings.yml b/settings.yml @@ -0,0 +1,107 @@ +server: + port : 8888 + secret_key : "ultrasecretkey" # change this! + debug : True + request_timeout : 3.0 # seconds + base_url: False + +engines: + - name : wikipedia + engine : mediawiki + url : https://en.wikipedia.org/ + number_of_results : 1 + + - name : bing + engine : bing + locale : en-US + + - name : currency + engine : currency_convert + categories : general + + - name : deviantart + engine : deviantart + categories : images + + - name : ddg definitions + engine : duckduckgo_definitions + + - name : duckduckgo + engine : duckduckgo + locale : en-us + + - name : filecrop + engine : filecrop + categories : files + + - name : flickr + engine : flickr + categories : images + + - name : github + engine : github + categories : it + + - name : google + engine : json_engine + search_url : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query} + categories : general + url_query : /responseData/results/unescapedUrl + content_query : /responseData/results/content + title_query : /responseData/results/titleNoFormatting + + - name : google images + engine : google_images + categories : images + + - name : piratebay + engine : piratebay + categories : videos, music, files + + - name : soundcloud + engine : soundcloud + categories : music + + - name : stackoverflow + engine : stackoverflow + categories : it + + - name : startpage + engine : startpage + + - name : twitter + engine : twitter + categories : social media + + - name : urbandictionary + engine : xpath + search_url : http://www.urbandictionary.com/define.php?term={query} + url_xpath : //div[@class="word"]//a/@href + title_xpath : //div[@class="word"]//a + content_xpath : //div[@class="definition"] + + - name : yahoo + engine : xpath + search_url : http://search.yahoo.com/search?p={query} + results_xpath : //div[@class="res"] + url_xpath : .//h3/a/@href + title_xpath : .//h3/a + content_xpath : .//div[@class="abstr"] + suggestion_xpath : //div[@id="satat"]//a + + - name : youtube + engine : youtube + categories : videos + + - name : dailymotion + engine : dailymotion + locale : en_US + categories : videos + + - name : vimeo + engine : vimeo + categories : videos + results_xpath : //div[@id="browse_content"]/ol/li + url_xpath : ./a/@href + title_xpath : ./a/div[@class="data"]/p[@class="title"]/text() + content_xpath : ./a/img/@src diff --git a/setup.py b/setup.py @@ -32,6 +32,7 @@ setup( 'flask', 'grequests', 'lxml', + 'pyyaml', 'setuptools', ], extras_require={ diff --git a/versions.cfg b/versions.cfg @@ -16,6 +16,7 @@ mccabe = 0.2.1 pep8 = 1.4.6 plone.testing = 4.0.8 pyflakes = 0.7.3 +pyyaml = 3.10 requests = 2.2.0 robotframework-debuglibrary = 0.3 robotframework-httplibrary = 0.4.2