commit: 8bff42f049dcac77559beaf2932a47921feb1d49
parent f30d5e87938275219852c94d57771e182bfa435b
Author: Adam Tauber <asciimoo@gmail.com>
Date: Wed, 28 Dec 2016 20:00:53 +0100
Merge branch 'master' into languages
Diffstat:
7 files changed, 104 insertions(+), 15 deletions(-)
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -59,3 +59,4 @@ generally made searx better:
- Thomas Renard @threnard
- Pydo `<https://github.com/pydo>`_
- Athemis `<https://github.com/Athemis>`_
+- Stefan Antoni `<http://stefan.antoni.io>`
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
@@ -34,7 +34,8 @@ engine_dir = dirname(realpath(__file__))
engines = {}
categories = {'general': []}
-_initialized = False
+
+languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
engine_shortcuts = {}
engine_default_args = {'paging': False,
@@ -214,13 +215,7 @@ def get_engines_stats():
]
-if 'engines' not in settings or not settings['engines']:
- logger.error('No engines found. Edit your settings.yml')
- exit(2)
-
-languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
-
-for engine_data in settings['engines']:
- engine = load_engine(engine_data)
- if engine is not None:
+def initialize_engines(engine_list):
+ for engine_data in engine_list:
+ engine = load_engine(engine_data)
engines[engine.name] = engine
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
@@ -72,9 +72,9 @@ def response(resp):
'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
}
- img = result.xpath('.//img/@src')[0]
- if img and not img.startswith('data'):
- r['img_src'] = img
+ imgs = result.xpath('.//img/@src')
+ if len(imgs) and not imgs[0].startswith('data'):
+ r['img_src'] = imgs[0]
results.append(r)
diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py
@@ -0,0 +1,57 @@
+"""
+ Searx (all)
+
+ @website https://github.com/asciimoo/searx
+ @provide-api yes (https://asciimoo.ithub.io/searx/dev/search_api.html)
+
+ @using-api yes
+ @results JSON
+ @stable yes (using api)
+ @parse url, title, content
+"""
+
+from json import loads
+from searx.engines import categories as searx_categories
+
+
+categories = searx_categories.keys()
+
+# search-url
+instance_urls = []
+instance_index = 0
+
+
+# do search-request
+def request(query, params):
+ global instance_index
+ params['url'] = instance_urls[instance_index % len(instance_urls)]
+ params['method'] = 'POST'
+
+ instance_index += 1
+
+ params['data'] = {
+ 'q': query,
+ 'pageno': params['pageno'],
+ 'language': params['language'],
+ 'time_range': params['time_range'],
+ 'category': params['category'],
+ 'format': 'json'
+ }
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+
+ response_json = loads(resp.text)
+ results = response_json['results']
+
+ for i in ('answers', 'infoboxes'):
+ results.extend(response_json[i])
+
+ results.extend({'suggestion': s} for s in response_json['suggestions'])
+
+ results.append({'number_of_results': response_json['number_of_results']})
+
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -13,6 +13,7 @@ server:
secret_key : "ultrasecretkey" # change this!
base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/"
image_proxy : False # Proxying image results through searx
+ http_protocol_version : "1.0" # 1.0 and 1.1 are supported
ui:
themes_path : "" # Custom ui themes path - leave it blank if you didn't change
@@ -91,6 +92,17 @@ engines:
disabled : True
shortcut : bb
+ - name : ccc-tv
+ engine : xpath
+ paging : False
+ search_url : https://media.ccc.de/search/?q={query}
+ url_xpath : //div[@class="caption"]/h3/a/@href
+ title_xpath : //div[@class="caption"]/h3/a/text()
+ content_xpath : //div[@class="caption"]/h4/@title
+ categories : videos
+ disabled : True
+ shortcut : c3tv
+
- name : crossref
engine : json_engine
paging : True
@@ -154,6 +166,18 @@ engines:
shortcut : ddg
disabled : True
+ - name : etymonline
+ engine : xpath
+ paging : True
+ search_url : http://etymonline.com/?search={query}&p={pageno}
+ url_xpath : //dt/a[1]/@href
+ title_xpath : //dt
+ content_xpath : //dd
+ suggestion_xpath : //a[@class="crossreference"]
+ first_page_num : 0
+ shortcut : et
+ disabled : True
+
# api-key required: http://www.faroo.com/hp/api/api.html#key
# - name : faroo
# engine : faroo
@@ -430,6 +454,14 @@ engines:
shortcut : scc
disabled : True
+# - name : searx
+# engine : searx_engine
+# shortcut : se
+# instance_urls :
+# - http://127.0.0.1:8888/
+# - ...
+# disabled : True
+
- name : spotify
engine : spotify
shortcut : stf
diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml
@@ -13,6 +13,7 @@ server:
secret_key : "ultrasecretkey" # change this!
base_url : False
image_proxy : False
+ http_protocol_version : "1.0"
ui:
themes_path : ""
diff --git a/searx/webapp.py b/searx/webapp.py
@@ -53,7 +53,7 @@ from flask_babel import Babel, gettext, format_date, format_decimal
from flask.json import jsonify
from searx import settings, searx_dir, searx_debug
from searx.engines import (
- categories, engines, get_engines_stats, engine_shortcuts
+ categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
)
from searx.utils import (
UnicodeWriter, highlight_content, html_to_text, get_themes,
@@ -81,7 +81,7 @@ except ImportError:
# serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler
-WSGIRequestHandler.protocol_version = "HTTP/1.1"
+WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
static_path, templates_path, themes =\
get_themes(settings['ui']['themes_path']
@@ -769,6 +769,9 @@ def page_not_found(e):
def run():
+ if not searx_debug or os.environ.get("WERKZEUG_RUN_MAIN") == "true":
+ initialize_engines(settings['engines'])
+
app.run(
debug=searx_debug,
use_debugger=searx_debug,