logo

searx

My custom branche(s) on searx, a meta-search engine
commit: e23c8f954b6ae0f1100167d39b73e2037384c154
parent: 6243639f013b18fbdf6d667895f32fda6048d8f8
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Tue,  1 Nov 2016 20:01:51 +0100

Merge pull request #746 from kvch/moar-time-range-support

Support time range search in more engines

Diffstat:

Msearx/engines/bing_images.py7+++++++
Msearx/engines/bing_news.py24+++++++++++++++++++-----
Msearx/engines/flickr_noapi.py17++++++++++++++---
Msearx/engines/youtube_noapi.py7+++++++
Msearx/templates/oscar/preferences.html2++
Mtests/unit/engines/test_bing_images.py1+
Mtests/unit/engines/test_bing_news.py1+
Mtests/unit/engines/test_flickr_noapi.py1+
Mtests/unit/engines/test_youtube_noapi.py1+
9 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py @@ -24,11 +24,16 @@ import re categories = ['images'] paging = True safesearch = True +time_range_support = True # search-url base_url = 'https://www.bing.com/' search_string = 'images/search?{query}&count=10&first={offset}' +time_range_string = '&qft=+filterui:age-lt{interval}' thumb_url = "https://www.bing.com/th?id={ihk}" +time_range_dict = {'day': '1440', + 'week': '10080', + 'month': '43200'} # safesearch definitions safesearch_types = {2: 'STRICT', @@ -58,6 +63,8 @@ def request(query, params): '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') params['url'] = base_url + search_path + if params['time_range'] in time_range_dict: + params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']]) return params diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py @@ -22,10 +22,15 @@ from searx.utils import list_get categories = ['news'] paging = True language_support = True +time_range_support = True # search-url base_url = 'https://www.bing.com/' search_string = 'news/search?{query}&first={offset}&format=RSS' +search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS' +time_range_dict = {'day': '7', + 'week': '8', + 'month': '9'} # remove click @@ -46,6 +51,19 @@ def image_url_cleanup(url_string): return url_string +def _get_url(query, language, offset, time_range): + if time_range in time_range_dict: + search_path = search_string_with_time.format( + query=urlencode({'q': query, 'setmkt': language}), + offset=offset, + interval=time_range_dict[time_range]) + else: + search_path = search_string.format( + query=urlencode({'q': query, 'setmkt': language}), + offset=offset) + return base_url + search_path + + # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 @@ -55,11 +73,7 @@ def request(query, params): else: language = params['language'].replace('_', '-') - search_path = search_string.format( - query=urlencode({'q': query, 'setmkt': language}), - offset=offset) - - params['url'] = base_url + search_path + params['url'] = _get_url(query, language, offset, params['time_range']) return params diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py @@ -14,6 +14,7 @@ from urllib import urlencode from json import loads +from time import time import re from searx.engines import logger @@ -24,21 +25,31 @@ categories = ['images'] url = 'https://www.flickr.com/' search_url = url + 'search?{query}&page={page}' +time_range_url = '&min_upload_date={start}&max_upload_date={end}' photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' regex = re.compile(r"\"search-photos-lite-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') paging = True +time_range_support = True +time_range_dict = {'day': 60 * 60 * 24, + 'week': 60 * 60 * 24 * 7, + 'month': 60 * 60 * 24 * 7 * 4} def build_flickr_url(user_id, photo_id): return photo_url.format(userid=user_id, photoid=photo_id) -def request(query, params): - params['url'] = search_url.format(query=urlencode({'text': query}), - page=params['pageno']) +def _get_time_range_url(time_range): + if time_range in time_range_dict: + return time_range_url.format(start=time(), end=str(int(time()) - time_range_dict[time_range])) + return '' + +def request(query, params): + params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno']) + + _get_time_range_url(params['time_range'])) return params diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py @@ -17,10 +17,15 @@ from searx.utils import list_get categories = ['videos', 'music'] paging = True language_support = False +time_range_support = True # search-url base_url = 'https://www.youtube.com/results' search_url = base_url + '?search_query={query}&page={page}' +time_range_url = '&sp=EgII{time_range}%253D%253D' +time_range_dict = {'day': 'Ag', + 'week': 'Aw', + 'month': 'BA'} embedded_url = '<iframe width="540" height="304" ' +\ 'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\ @@ -47,6 +52,8 @@ def extract_text_from_dom(result, xpath): def request(query, params): params['url'] = search_url.format(query=quote_plus(query), page=params['pageno']) + if params['time_range'] in time_range_dict: + params['url'] += time_range_url.format(time_range=time_range_dict[params['time_range']]) return params diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html @@ -158,6 +158,7 @@ <th>{{ _("Engine name") }}</th> <th>{{ _("Shortcut") }}</th> <th>{{ _("SafeSearch") }}</th> + <th>{{ _("Time range") }}</th> <th>{{ _("Avg. time") }}</th> <th>{{ _("Max time") }}</th> {% else %} @@ -179,6 +180,7 @@ <th>{{ search_engine.name }}</th> <td>{{ shortcuts[search_engine.name] }}</td> <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> + <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td> <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td> {% else %} diff --git a/tests/unit/engines/test_bing_images.py b/tests/unit/engines/test_bing_images.py @@ -13,6 +13,7 @@ class TestBingImagesEngine(SearxTestCase): dicto['pageno'] = 1 dicto['language'] = 'fr_FR' dicto['safesearch'] = 1 + dicto['time_range'] = '' params = bing_images.request(query, dicto) self.assertTrue('url' in params) self.assertTrue(query in params['url']) diff --git a/tests/unit/engines/test_bing_news.py b/tests/unit/engines/test_bing_news.py @@ -12,6 +12,7 @@ class TestBingNewsEngine(SearxTestCase): dicto = defaultdict(dict) dicto['pageno'] = 1 dicto['language'] = 'fr_FR' + dicto['time_range'] = '' params = bing_news.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) diff --git a/tests/unit/engines/test_flickr_noapi.py b/tests/unit/engines/test_flickr_noapi.py @@ -15,6 +15,7 @@ class TestFlickrNoapiEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 + dicto['time_range'] = '' params = flickr_noapi.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) diff --git a/tests/unit/engines/test_youtube_noapi.py b/tests/unit/engines/test_youtube_noapi.py @@ -11,6 +11,7 @@ class TestYoutubeNoAPIEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 0 + dicto['time_range'] = '' params = youtube_noapi.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url'])