logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 6042f2bc53d2b6f0d03e6b882db83377b27029be
parent 78828efdb0ea28efa057dbd82b240af1112f085a
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date:   Sun,  1 Feb 2015 11:27:28 +0100

[enh] add 1x.com engine

* Deacivated by default, because of the big amount of results

Diffstat:

Asearx/engines/www1x.py81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/settings.yml5+++++
2 files changed, 86 insertions(+), 0 deletions(-)

diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py @@ -0,0 +1,81 @@ +## 1x (Images) +# +# @website http://1x.com/ +# @provide-api no +# +# @using-api no +# @results HTML +# @stable no (HTML can change) +# @parse url, title, thumbnail, img_src, content + + +from urllib import urlencode +from urlparse import urljoin +from lxml import html +import string +import re + +# engine dependent config +categories = ['images'] +paging = False + +# search-url +base_url = 'http://1x.com' +search_url = base_url+'/backend/search.php?{query}' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'q': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + # get links from result-text + results_parts = re.split(r'(</a>|<a)', resp.text) + + cur_element = '' + + # iterate over link parts + for result_part in results_parts: + # processed start and end of link + if result_part == '<a': + cur_element = result_part + continue + elif result_part != '</a>': + cur_element += result_part + continue + + cur_element += result_part + + # fix xml-error + cur_element = string.replace(cur_element, '"></a>', '"/></a>') + + dom = html.fromstring(cur_element) + link = dom.xpath('//a')[0] + + url = urljoin(base_url, link.attrib.get('href')) + title = link.attrib.get('title', '') + + thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src']) + # TODO: get image with higher resolution + img_src = thumbnail_src + + # check if url is showing to a photo + if '/photo/' not in url: + continue + + # append result + results.append({'url': url, + 'title': title, + 'img_src': img_src, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'template': 'images.html'}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml @@ -83,6 +83,11 @@ engines: engine : www500px shortcut : px + - name : 1x + engine : www1x + shortcut : 1x + disabled : True + - name : flickr categories : images shortcut : fl