commit: 596c6b6c93a50d8d797e90ad5dd9b608599cd653
parent: 55abf07a4f80f74fbcfbeddaee4f8591216802cd
Author: Adam Tauber <asciimoo@gmail.com>
Date: Tue, 6 Sep 2016 17:01:50 +0200
Merge pull request #678 from potato/master
[engine] dictzone + mymemory.translated engine
Diffstat:
4 files changed, 167 insertions(+), 0 deletions(-)
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
@@ -0,0 +1,70 @@
+"""
+ Dictzone
+
+ @website https://dictzone.com/
+ @provide-api no
+ @using-api no
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content
+"""
+
+import re
+from urlparse import urljoin
+from lxml import html
+from cgi import escape
+from searx.engines.xpath import extract_text
+from searx.utils import is_valid_lang
+
+categories = ['general']
+url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
+weight = 100
+
+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
+results_xpath = './/table[@id="r"]/tr'
+
+
+def request(query, params):
+ m = parser_re.match(unicode(query, 'utf8'))
+ if not m:
+ return params
+
+ from_lang, to_lang, query = m.groups()
+
+ from_lang = is_valid_lang(from_lang)
+ to_lang = is_valid_lang(to_lang)
+
+ if not from_lang or not to_lang:
+ return params
+
+ params['url'] = url.format(from_lang=from_lang[2],
+ to_lang=to_lang[2],
+ query=query)
+
+ return params
+
+
+def response(resp):
+ results = []
+
+ dom = html.fromstring(resp.text)
+
+ for k, result in enumerate(dom.xpath(results_xpath)[1:]):
+ try:
+ from_result, to_results_raw = result.xpath('./td')
+ except:
+ continue
+
+ to_results = []
+ for to_result in to_results_raw.xpath('./p/a'):
+ t = to_result.text_content()
+ if t.strip():
+ to_results.append(to_result.text_content())
+
+ results.append({
+ 'url': urljoin(resp.url, '?%d' % k),
+ 'title': escape(from_result.text_content()),
+ 'content': escape('; '.join(to_results))
+ })
+
+ return results
diff --git a/searx/engines/translated.py b/searx/engines/translated.py
@@ -0,0 +1,69 @@
+"""
+ MyMemory Translated
+
+ @website https://mymemory.translated.net/
+ @provide-api yes (https://mymemory.translated.net/doc/spec.php)
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, content
+"""
+import re
+from urlparse import urljoin
+from lxml import html
+from cgi import escape
+from searx.engines.xpath import extract_text
+from searx.utils import is_valid_lang
+
+categories = ['general']
+url = 'http://api.mymemory.translated.net/get?q={query}' \
+ '&langpair={from_lang}|{to_lang}{key}'
+web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
+weight = 100
+
+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
+api_key = ''
+
+
+def request(query, params):
+ m = parser_re.match(unicode(query, 'utf8'))
+ if not m:
+ return params
+
+ from_lang, to_lang, query = m.groups()
+
+ from_lang = is_valid_lang(from_lang)
+ to_lang = is_valid_lang(to_lang)
+
+ if not from_lang or not to_lang:
+ return params
+
+ if api_key:
+ key_form = '&key=' + api_key
+ else:
+ key_form = ''
+ params['url'] = url.format(from_lang=from_lang[1],
+ to_lang=to_lang[1],
+ query=query,
+ key=key_form)
+ params['query'] = query
+ params['from_lang'] = from_lang
+ params['to_lang'] = to_lang
+
+ return params
+
+
+def response(resp):
+ results = []
+ results.append({
+ 'url': escape(web_url.format(
+ from_lang=resp.search_params['from_lang'][2],
+ to_lang=resp.search_params['to_lang'][2],
+ query=resp.search_params['query'])),
+ 'title': escape('[{0}-{1}] {2}'.format(
+ resp.search_params['from_lang'][1],
+ resp.search_params['to_lang'][1],
+ resp.search_params['query'])),
+ 'content': escape(resp.json()['responseData']['translatedText'])
+ })
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -495,6 +495,19 @@ engines:
timeout: 6.0
categories : science
+ - name : dictzone
+ engine : dictzone
+ shortcut : dc
+
+ - name : mymemory translated
+ engine : translated
+ shortcut : tl
+ timeout : 5.0
+ disabled : True
+ # You can use without an API key, but you are limited to 1000 words/day
+ # See : http://mymemory.translated.net/doc/usagelimits.php
+ # api_key : ''
+
#The blekko technology and team have joined IBM Watson! -> https://blekko.com/
# - name : blekko images
# engine : blekko_images
diff --git a/searx/utils.py b/searx/utils.py
@@ -9,6 +9,7 @@ from HTMLParser import HTMLParser
from random import choice
from searx.version import VERSION_STRING
+from searx.languages import language_codes
from searx import settings
from searx import logger
@@ -255,3 +256,17 @@ def get_torrent_size(filesize, filesize_multiplier):
filesize = None
return filesize
+
+
+def is_valid_lang(lang):
+ is_abbr = (len(lang) == 2)
+ if is_abbr:
+ for l in language_codes:
+ if l[0][:2] == lang.lower():
+ return (True, l[0][:2], l[1].lower())
+ return False
+ else:
+ for l in language_codes:
+ if l[1].lower() == lang.lower():
+ return (True, l[0][:2], l[1].lower())
+ return False