logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 78be030500526ad073b1291baec6bfbf36eb221d
parent b786ac9a6360cc50472d572148343dc84b908830
Author: cy8aer <cybaer42@web.de>
Date:   Mon,  9 Jul 2018 15:34:43 +0200

Merge branch 'master' into dsgvo

Diffstat:

Msearx/engines/bing.py5++++-
Msearx/engines/wikidata.py3++-
Msearx/utils.py21+++++++++++----------
Mtests/unit/engines/test_wikidata.py21++++++++++++---------
4 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/searx/engines/bing.py b/searx/engines/bing.py @@ -16,7 +16,7 @@ from lxml import html from searx.engines.xpath import extract_text from searx.url_utils import urlencode -from searx.utils import match_language +from searx.utils import match_language, gen_useragent # engine dependent config categories = ['general'] @@ -43,6 +43,9 @@ def request(query, params): offset=offset) params['url'] = base_url + search_path + + params['headers']['User-Agent'] = gen_useragent('Windows NT 6.3; WOW64') + return params diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py @@ -54,6 +54,7 @@ value_xpath = './/div[contains(@class,"wikibase-statementview-mainsnak")]'\ + '/*/div[contains(@class,"wikibase-snakview-value")]' language_fallback_xpath = '//sup[contains(@class,"wb-language-fallback-indicator")]' calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' +media_xpath = value_xpath + '//div[contains(@class,"commons-media-caption")]//a' def request(query, params): @@ -313,7 +314,7 @@ def add_image(result): for property_id in property_ids: image = result.xpath(property_xpath.replace('{propertyid}', property_id)) if image: - image_name = image[0].xpath(value_xpath) + image_name = image[0].xpath(media_xpath) image_src = url_image.replace('{filename}', extract_text(image_name[0])) return image_src diff --git a/searx/utils.py b/searx/utils.py @@ -39,14 +39,15 @@ else: logger = logger.getChild('utils') -ua_versions = ('40.0', - '41.0', - '42.0', - '43.0', - '44.0', - '45.0', - '46.0', - '47.0') +ua_versions = ('52.8.1', + '53.0', + '54.0', + '55.0', + '56.0', + '57.0', + '58.0', + '59.0', + '60.0.2') ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64', @@ -58,9 +59,9 @@ blocked_tags = ('script', 'style') -def gen_useragent(): +def gen_useragent(os=None): # TODO - return ua.format(os=choice(ua_os), version=choice(ua_versions)) + return ua.format(os=os or choice(ua_os), version=choice(ua_versions)) def searx_useragent(): diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py @@ -123,9 +123,10 @@ class TestWikidataEngine(SearxTestCase): <div class="wikibase-statementview-mainsnak"> <div> <div class="wikibase-snakview-value"> - <a href="https://commons.wikimedia.org/wiki/File:image.png"> - image.png - </a> + <div class="commons-media-caption"> + <a href="https://commons.wikimedia.org/wiki/File:image.png">image.png</a> + <br/>2,687 &#215; 3,356; 1.22 MB + </div> </div> </div> </div> @@ -156,9 +157,10 @@ class TestWikidataEngine(SearxTestCase): <div class="wikibase-statementview-mainsnak"> <div> <div class="wikibase-snakview-value"> - <a href="https://commons.wikimedia.org/wiki/File:icon.png"> - icon.png - </a> + <div class="commons-media-caption"> + <a href="https://commons.wikimedia.org/wiki/File:icon.png">icon.png</a> + <br/>671 &#215; 671; 18 KB</div> + </div> </div> </div> </div> @@ -179,9 +181,10 @@ class TestWikidataEngine(SearxTestCase): <div class="wikibase-statementview-mainsnak"> <div> <div class="wikibase-snakview-value"> - <a href="https://commons.wikimedia.org/wiki/File:logo.png"> - logo.png - </a> + <div class="commons-media-caption"> + <a href="https://commons.wikimedia.org/wiki/File:logo.png">logo.png</a> + <br/>170 &#215; 170; 1 KB + </div> </div> </div> </div>