logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 0b3d632cd0f0fb81db8bc70957ce525ef428c0dd
parent: 667f4d5cfc4cb6bc0c2e47f230915d35e63bc3d2
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Sat, 20 Dec 2014 11:58:24 +0100

Merge pull request #158 from Cqoicebordel/Moar-Engines

Add 500px and Searchcode engines

Diffstat:

Asearx/engines/500px.py57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/engines/searchcode_code.py65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asearx/engines/searchcode_doc.py49+++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/settings.yml12++++++++++++
4 files changed, 183 insertions(+), 0 deletions(-)

diff --git a/searx/engines/500px.py b/searx/engines/500px.py @@ -0,0 +1,57 @@ +## 500px (Images) +# +# @website https://500px.com +# @provide-api yes (https://developers.500px.com/) +# +# @using-api no +# @results HTML +# @stable no (HTML can change) +# @parse url, title, thumbnail, img_src, content +# +# @todo rewrite to api + + +from urllib import urlencode +from urlparse import urljoin +from lxml import html + +# engine dependent config +categories = ['images'] +paging = True + +# search-url +base_url = 'https://500px.com' +search_url = base_url+'/search?search?page={pageno}&type=photos&{query}' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(pageno=params['pageno'], + query=urlencode({'q': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + # parse results + for result in dom.xpath('//div[@class="photo"]'): + link = result.xpath('.//a')[0] + url = urljoin(base_url, link.attrib.get('href')) + title = result.xpath('.//div[@class="title"]//text()')[0] + img_src = link.xpath('.//img')[0].attrib['src'] + content = result.xpath('.//div[@class="info"]//text()')[0] + + # append result + results.append({'url': url, + 'title': title, + 'img_src': img_src, + 'content': content, + 'template': 'images.html'}) + + # return results + return results diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py @@ -0,0 +1,65 @@ +## Searchcode (It) +# +# @website https://searchcode.com/ +# @provide-api yes (https://searchcode.com/api/) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, content + +from urllib import urlencode +from json import loads +import cgi +import re + +# engine dependent config +categories = ['it'] +paging = True + +# search-url +url = 'https://searchcode.com/' +search_url = url+'api/codesearch_I/?{query}&p={pageno}' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'q': query}), + pageno=params['pageno']-1) + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # parse results + for result in search_results['results']: + href = result['url'] + title = "" + result['name'] + " - " + result['filename'] + content = result['repo'] + "<br />" + + lines = dict() + for line, code in result['lines'].items(): + lines[int(line)] = code + + content = content + '<pre class="code-formatter"><table class="code">' + for line, code in sorted(lines.items()): + content = content + '<tr><td class="line-number" style="padding-right:5px;">' + content = content + str(line) + '</td><td class="code-snippet">' + # Replace every two spaces with ' &nbps;' to keep formatting while allowing the browser to break the line if necessary + content = content + cgi.escape(code).replace('\t', ' ').replace(' ', '&nbsp; ').replace(' ', ' &nbsp;') + content = content + "</td></tr>" + + content = content + "</table></pre>" + + # append result + results.append({'url': href, + 'title': title, + 'content': content}) + + # return results + return results diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py @@ -0,0 +1,49 @@ +## Searchcode (It) +# +# @website https://searchcode.com/ +# @provide-api yes (https://searchcode.com/api/) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, content + +from urllib import urlencode +from json import loads + +# engine dependent config +categories = ['it'] +paging = True + +# search-url +url = 'https://searchcode.com/' +search_url = url+'api/search_IV/?{query}&p={pageno}' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'q': query}), + pageno=params['pageno']-1) + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # parse results + for result in search_results['results']: + href = result['url'] + title = "[" + result['type'] + "] " + result['namespace'] + " " + result['name'] + content = '<span class="highlight">[' + result['type'] + "] " + result['name'] + " " + result['synopsis'] + "</span><br />" + result['description'] + + # append result + results.append({'url': href, + 'title': title, + 'content': content}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -64,6 +64,10 @@ engines: # engine : filecrop # categories : files # shortcut : fc + + - name : 500px + engine : 500px + shortcut : px - name : flickr engine : flickr @@ -114,6 +118,14 @@ engines: - name : stackoverflow engine : stackoverflow shortcut : st + + - name : searchcode doc + engine : searchcode_doc + shortcut : scd + + - name : searchcode code + engine : searchcode_code + shortcut : scc - name : startpage engine : startpage