logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 74b6be3991dc62577aca295de839e51e6d0807d6
parent: 39d229e1104dc10c7c7f00380c02d46118e3d895
Author: asciimoo <asciimoo@gmail.com>
Date:   Wed, 23 Oct 2013 23:55:37 +0200

[enh] engine cfg compatibilty

Diffstat:

Msearx/engines/duckduckgo.py5++++-
Msearx/engines/duckduckgo_definitions.py5++++-
Msearx/engines/flickr.py14++++++--------
Msearx/engines/github.py4++--
Msearx/engines/google_images.py9++++-----
Msearx/engines/piratebay.py13+++++--------
Msearx/engines/soundcloud.py6++++--
Msearx/engines/stackoverflow.py10++++------
Msearx/engines/youtube.py9+++------
9 files changed, 36 insertions(+), 39 deletions(-)

diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py @@ -1,8 +1,11 @@ from json import loads +from urllib import urlencode +url = 'https://duckduckgo.com/' +search_url = url + 'd.js?{query}&l=us-en&p=1&s=0' def request(query, params): - params['url'] = 'https://duckduckgo.com/d.js?q=%s&l=us-en&p=1&s=0' % query + params['url'] = search_url.format(query=urlencode({'q': query})) return params diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py @@ -1,7 +1,10 @@ import json +from urllib import urlencode + +url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0' def request(query, params): - params['url'] = 'http://api.duckduckgo.com/?q=%s&format=json&pretty=0' % query + params['url'] = url.format(query=urlencode({'q': query})) return params diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py @@ -1,18 +1,16 @@ #!/usr/bin/env python -from urllib import quote +from urllib import urlencode from lxml import html from urlparse import urljoin categories = ['images'] -base_url = 'https://secure.flickr.com/' -search_url = base_url+'search/?q=' +url = 'https://secure.flickr.com/' +search_url = url+'search/?q={query}' def request(query, params): - global search_url - query = quote(query.replace(' ', '+'), safe='+') - params['url'] = search_url + query + params['url'] = search_url.format(query=urlencode({'q': query})) return params def response(resp): @@ -20,11 +18,11 @@ def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath('//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'): - url = urljoin(base_url, result.attrib.get('href')) + href = urljoin(url, result.attrib.get('href')) img = result.xpath('.//img')[0] title = img.attrib.get('alt', '') img_src = img.attrib.get('data-defer-src') if not img_src: continue - results.append({'url': url, 'title': title, 'img_src': img_src, 'template': 'images.html'}) + results.append({'url': href, 'title': title, 'img_src': img_src, 'template': 'images.html'}) return results diff --git a/searx/engines/github.py b/searx/engines/github.py @@ -4,11 +4,11 @@ from cgi import escape categories = ['it'] -search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&' +search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}' def request(query, params): global search_url - params['url'] = search_url + urlencode({'q': query}) + params['url'] = search_url.format(query=urlencode({'q': query})) params['headers']['Accept'] = 'application/vnd.github.preview.text-match+json' return params diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py @@ -5,15 +5,14 @@ from json import loads categories = ['images'] -search_url = 'https://ajax.googleapis.com/ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&' +url = 'https://ajax.googleapis.com/' +search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}' def request(query, params): - global search_url params['url'] = search_url + urlencode({'q': query}) return params def response(resp): - global base_url results = [] search_res = loads(resp.text) if not search_res.get('responseData'): @@ -21,9 +20,9 @@ def response(resp): if not search_res['responseData'].get('results'): return [] for result in search_res['responseData']['results']: - url = result['originalContextUrl'] + href = result['originalContextUrl'] title = result['title'] if not result['url']: continue - results.append({'url': url, 'title': title, 'content': '', 'img_src': result['url'], 'template': 'images.html'}) + results.append({'url': href, 'title': title, 'content': '', 'img_src': result['url'], 'template': 'images.html'}) return results diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py @@ -5,21 +5,18 @@ from urllib import quote categories = ['videos', 'music'] -base_url = 'https://thepiratebay.sx/' -search_url = base_url + 'search/{search_term}/0/99/{search_type}' +url = 'https://thepiratebay.sx/' +search_url = url + 'search/{search_term}/0/99/{search_type}' search_types = {'videos': '200' ,'music' : '100' } def request(query, params): - global search_url, search_types - # 200 is the video category params['url'] = search_url.format(search_term=quote(query), search_type=search_types.get(params['category'])) return params def response(resp): - global base_url results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//table[@id="searchResult"]//tr') @@ -27,12 +24,12 @@ def response(resp): return results for result in search_res[1:]: link = result.xpath('.//div[@class="detName"]//a')[0] - url = urljoin(base_url, link.attrib.get('href')) + href = urljoin(url, link.attrib.get('href')) title = ' '.join(link.xpath('.//text()')) content = escape(' '.join(result.xpath('.//font[@class="detDesc"]//text()'))) seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] content += '<br />Seed: %s, Leech: %s' % (seed, leech) magnetlink = result.xpath('.//a[@title="Download this torrent using magnet"]')[0] - content += '<br /><a href="%s">magnet link</a>' % urljoin(base_url, magnetlink.attrib['href']) - results.append({'url': url, 'title': title, 'content': content}) + content += '<br /><a href="%s">magnet link</a>' % urljoin(url, magnetlink.attrib['href']) + results.append({'url': href, 'title': title, 'content': content}) return results diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py @@ -1,13 +1,15 @@ from json import loads +from urllib import urlencode categories = ['music'] guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28' -search_url = 'https://api.soundcloud.com/search?q=%s&facet=model&limit=10&offset=0&linked_partitioning=1&client_id='+guest_client_id +url = 'https://api.soundcloud.com/' +search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id def request(query, params): global search_url - params['url'] = search_url % query + params['url'] = search_url.format(query=urlencode({'q': query})) return params diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py @@ -5,23 +5,21 @@ from urllib import urlencode categories = ['it'] -base_url = 'http://stackoverflow.com/' -search_url = base_url+'search?' +url = 'http://stackoverflow.com/' +search_url = url+'search?' def request(query, params): - global search_url params['url'] = search_url + urlencode({'q': query}) return params def response(resp): - global base_url results = [] dom = html.fromstring(resp.text) for result in dom.xpath('//div[@class="question-summary search-result"]'): link = result.xpath('.//div[@class="result-link"]//a')[0] - url = urljoin(base_url, link.attrib.get('href')) + href = urljoin(url, link.attrib.get('href')) title = ' '.join(link.xpath('.//text()')) content = escape(' '.join(result.xpath('.//div[@class="excerpt"]//text()'))) - results.append({'url': url, 'title': title, 'content': content}) + results.append({'url': href, 'title': title, 'content': content}) return results diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py @@ -1,15 +1,12 @@ from json import loads -from urllib import quote +from urllib import urlencode categories = ['videos'] -search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&q=' +search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&{query}' def request(query, params): - global search_url - query = quote(query.replace(' ', '+'), safe='+') - params['url'] = search_url + query - + params['url'] = search_url.format(query=urlencode({'q': query})) return params