commit: 0b3d632cd0f0fb81db8bc70957ce525ef428c0dd
parent: 667f4d5cfc4cb6bc0c2e47f230915d35e63bc3d2
Author: Adam Tauber <asciimoo@gmail.com>
Date: Sat, 20 Dec 2014 11:58:24 +0100
Merge pull request #158 from Cqoicebordel/Moar-Engines
Add 500px and Searchcode engines
Diffstat:
4 files changed, 183 insertions(+), 0 deletions(-)
diff --git a/searx/engines/500px.py b/searx/engines/500px.py
@@ -0,0 +1,57 @@
+## 500px (Images)
+#
+# @website https://500px.com
+# @provide-api yes (https://developers.500px.com/)
+#
+# @using-api no
+# @results HTML
+# @stable no (HTML can change)
+# @parse url, title, thumbnail, img_src, content
+#
+# @todo rewrite to api
+
+
+from urllib import urlencode
+from urlparse import urljoin
+from lxml import html
+
+# engine dependent config
+categories = ['images']
+paging = True
+
+# search-url
+base_url = 'https://500px.com'
+search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(pageno=params['pageno'],
+ query=urlencode({'q': query}))
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ dom = html.fromstring(resp.text)
+
+ # parse results
+ for result in dom.xpath('//div[@class="photo"]'):
+ link = result.xpath('.//a')[0]
+ url = urljoin(base_url, link.attrib.get('href'))
+ title = result.xpath('.//div[@class="title"]//text()')[0]
+ img_src = link.xpath('.//img')[0].attrib['src']
+ content = result.xpath('.//div[@class="info"]//text()')[0]
+
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'img_src': img_src,
+ 'content': content,
+ 'template': 'images.html'})
+
+ # return results
+ return results
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
@@ -0,0 +1,65 @@
+## Searchcode (It)
+#
+# @website https://searchcode.com/
+# @provide-api yes (https://searchcode.com/api/)
+#
+# @using-api yes
+# @results JSON
+# @stable yes
+# @parse url, title, content
+
+from urllib import urlencode
+from json import loads
+import cgi
+import re
+
+# engine dependent config
+categories = ['it']
+paging = True
+
+# search-url
+url = 'https://searchcode.com/'
+search_url = url+'api/codesearch_I/?{query}&p={pageno}'
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(query=urlencode({'q': query}),
+ pageno=params['pageno']-1)
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ search_results = loads(resp.text)
+
+ # parse results
+ for result in search_results['results']:
+ href = result['url']
+ title = "" + result['name'] + " - " + result['filename']
+ content = result['repo'] + "<br />"
+
+ lines = dict()
+ for line, code in result['lines'].items():
+ lines[int(line)] = code
+
+ content = content + '<pre class="code-formatter"><table class="code">'
+ for line, code in sorted(lines.items()):
+ content = content + '<tr><td class="line-number" style="padding-right:5px;">'
+ content = content + str(line) + '</td><td class="code-snippet">'
+ # Replace every two spaces with ' &nbps;' to keep formatting while allowing the browser to break the line if necessary
+ content = content + cgi.escape(code).replace('\t', ' ').replace(' ', ' ').replace(' ', ' ')
+ content = content + "</td></tr>"
+
+ content = content + "</table></pre>"
+
+ # append result
+ results.append({'url': href,
+ 'title': title,
+ 'content': content})
+
+ # return results
+ return results
diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py
@@ -0,0 +1,49 @@
+## Searchcode (It)
+#
+# @website https://searchcode.com/
+# @provide-api yes (https://searchcode.com/api/)
+#
+# @using-api yes
+# @results JSON
+# @stable yes
+# @parse url, title, content
+
+from urllib import urlencode
+from json import loads
+
+# engine dependent config
+categories = ['it']
+paging = True
+
+# search-url
+url = 'https://searchcode.com/'
+search_url = url+'api/search_IV/?{query}&p={pageno}'
+
+
+# do search-request
+def request(query, params):
+ params['url'] = search_url.format(query=urlencode({'q': query}),
+ pageno=params['pageno']-1)
+
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ search_results = loads(resp.text)
+
+ # parse results
+ for result in search_results['results']:
+ href = result['url']
+ title = "[" + result['type'] + "] " + result['namespace'] + " " + result['name']
+ content = '<span class="highlight">[' + result['type'] + "] " + result['name'] + " " + result['synopsis'] + "</span><br />" + result['description']
+
+ # append result
+ results.append({'url': href,
+ 'title': title,
+ 'content': content})
+
+ # return results
+ return results
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -64,6 +64,10 @@ engines:
# engine : filecrop
# categories : files
# shortcut : fc
+
+ - name : 500px
+ engine : 500px
+ shortcut : px
- name : flickr
engine : flickr
@@ -114,6 +118,14 @@ engines:
- name : stackoverflow
engine : stackoverflow
shortcut : st
+
+ - name : searchcode doc
+ engine : searchcode_doc
+ shortcut : scd
+
+ - name : searchcode code
+ engine : searchcode_code
+ shortcut : scc
- name : startpage
engine : startpage