commit: 78be030500526ad073b1291baec6bfbf36eb221d
parent b786ac9a6360cc50472d572148343dc84b908830
Author: cy8aer <cybaer42@web.de>
Date: Mon, 9 Jul 2018 15:34:43 +0200
Merge branch 'master' into dsgvo
Diffstat:
4 files changed, 29 insertions(+), 21 deletions(-)
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
@@ -16,7 +16,7 @@
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
-from searx.utils import match_language
+from searx.utils import match_language, gen_useragent
# engine dependent config
categories = ['general']
@@ -43,6 +43,9 @@ def request(query, params):
offset=offset)
params['url'] = base_url + search_path
+
+ params['headers']['User-Agent'] = gen_useragent('Windows NT 6.3; WOW64')
+
return params
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
@@ -54,6 +54,7 @@ value_xpath = './/div[contains(@class,"wikibase-statementview-mainsnak")]'\
+ '/*/div[contains(@class,"wikibase-snakview-value")]'
language_fallback_xpath = '//sup[contains(@class,"wb-language-fallback-indicator")]'
calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
+media_xpath = value_xpath + '//div[contains(@class,"commons-media-caption")]//a'
def request(query, params):
@@ -313,7 +314,7 @@ def add_image(result):
for property_id in property_ids:
image = result.xpath(property_xpath.replace('{propertyid}', property_id))
if image:
- image_name = image[0].xpath(value_xpath)
+ image_name = image[0].xpath(media_xpath)
image_src = url_image.replace('{filename}', extract_text(image_name[0]))
return image_src
diff --git a/searx/utils.py b/searx/utils.py
@@ -39,14 +39,15 @@ else:
logger = logger.getChild('utils')
-ua_versions = ('40.0',
- '41.0',
- '42.0',
- '43.0',
- '44.0',
- '45.0',
- '46.0',
- '47.0')
+ua_versions = ('52.8.1',
+ '53.0',
+ '54.0',
+ '55.0',
+ '56.0',
+ '57.0',
+ '58.0',
+ '59.0',
+ '60.0.2')
ua_os = ('Windows NT 6.3; WOW64',
'X11; Linux x86_64',
@@ -58,9 +59,9 @@ blocked_tags = ('script',
'style')
-def gen_useragent():
+def gen_useragent(os=None):
# TODO
- return ua.format(os=choice(ua_os), version=choice(ua_versions))
+ return ua.format(os=os or choice(ua_os), version=choice(ua_versions))
def searx_useragent():
diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py
@@ -123,9 +123,10 @@ class TestWikidataEngine(SearxTestCase):
<div class="wikibase-statementview-mainsnak">
<div>
<div class="wikibase-snakview-value">
- <a href="https://commons.wikimedia.org/wiki/File:image.png">
- image.png
- </a>
+ <div class="commons-media-caption">
+ <a href="https://commons.wikimedia.org/wiki/File:image.png">image.png</a>
+ <br/>2,687 × 3,356; 1.22 MB
+ </div>
</div>
</div>
</div>
@@ -156,9 +157,10 @@ class TestWikidataEngine(SearxTestCase):
<div class="wikibase-statementview-mainsnak">
<div>
<div class="wikibase-snakview-value">
- <a href="https://commons.wikimedia.org/wiki/File:icon.png">
- icon.png
- </a>
+ <div class="commons-media-caption">
+ <a href="https://commons.wikimedia.org/wiki/File:icon.png">icon.png</a>
+ <br/>671 × 671; 18 KB</div>
+ </div>
</div>
</div>
</div>
@@ -179,9 +181,10 @@ class TestWikidataEngine(SearxTestCase):
<div class="wikibase-statementview-mainsnak">
<div>
<div class="wikibase-snakview-value">
- <a href="https://commons.wikimedia.org/wiki/File:logo.png">
- logo.png
- </a>
+ <div class="commons-media-caption">
+ <a href="https://commons.wikimedia.org/wiki/File:logo.png">logo.png</a>
+ <br/>170 × 170; 1 KB
+ </div>
</div>
</div>
</div>