[enh] add 1x.com engine - searx - My custom branche(s) on searx, a meta-search engine

commit: 6042f2bc53d2b6f0d03e6b882db83377b27029be
parent 78828efdb0ea28efa057dbd82b240af1112f085a
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date:   Sun,  1 Feb 2015 11:27:28 +0100

[enh] add 1x.com engine

* Deacivated by default, because of the big amount of results

Diffstat:
A searx/engines/www1x.py 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M searx/settings.yml 5 +++++

2 files changed, 86 insertions(+), 0 deletions(-)
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
@@ -0,0 +1,81 @@
+## 1x (Images)
+#
+# @website     http://1x.com/
+# @provide-api no
+#
+# @using-api   no
+# @results     HTML
+# @stable      no (HTML can change)
+# @parse       url, title, thumbnail, img_src, content
+
+
+from urllib import urlencode
+from urlparse import urljoin
+from lxml import html
+import string
+import re
+
+# engine dependent config
+categories = ['images']
+paging = False
+
+# search-url
+base_url = 'http://1x.com'
+search_url = base_url+'/backend/search.php?{query}'
+
+
+# do search-request
+def request(query, params):
+    params['url'] = search_url.format(query=urlencode({'q': query}))
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+
+    # get links from result-text
+    results_parts = re.split(r'(</a>|<a)', resp.text)
+
+    cur_element = ''
+
+    # iterate over link parts
+    for result_part in results_parts:
+        # processed start and end of link
+        if result_part == '<a':
+            cur_element = result_part
+            continue
+        elif result_part != '</a>':
+            cur_element += result_part
+            continue
+
+        cur_element += result_part
+
+        # fix xml-error
+        cur_element = string.replace(cur_element, '"></a>', '"/></a>')
+
+        dom = html.fromstring(cur_element)
+        link = dom.xpath('//a')[0]
+
+        url = urljoin(base_url, link.attrib.get('href'))
+        title = link.attrib.get('title', '')
+
+        thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
+        # TODO: get image with higher resolution
+        img_src = thumbnail_src
+
+        # check if url is showing to a photo
+        if '/photo/' not in url:
+            continue
+
+        # append result
+        results.append({'url': url,
+                        'title': title,
+                        'img_src': img_src,
+                        'content': '',
+                        'thumbnail_src': thumbnail_src,
+                        'template': 'images.html'})
+
+    # return results
+    return results
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -83,6 +83,11 @@ engines:
     engine : www500px
     shortcut : px
 
+  - name : 1x
+    engine : www1x
+    shortcut : 1x
+    disabled : True
+
   - name : flickr
     categories : images
     shortcut : fl

A	searx/engines/www1x.py	81	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	searx/settings.yml	5	+++++