[fix] make search requests on wikidata more accurate - searx - My custom branche(s) on searx, a meta-search engine

commit: b12857a70dd947a804e667d864ba56055b528ee0
parent 835d1edd5834c3c8117dc4614cb0b0b4316d3153
Author: Marc Abonce Seguin <marc-abonce@mailbox.org>
Date:   Sun,  8 Apr 2018 21:17:00 -0500

[fix] make search requests on wikidata more accurate

Diffstat:
M searx/engines/wikidata.py 13 ++++++-------
M searx/settings.yml 1 +

2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
@@ -27,7 +27,7 @@ result_count = 1
 # urls
 wikidata_host = 'https://www.wikidata.org'
 url_search = wikidata_host \
-    + '/wiki/Special:ItemDisambiguation?{query}'
+    + '/w/index.php?{query}'
 
 wikidata_api = wikidata_host + '/w/api.php'
 url_detail = wikidata_api\
@@ -40,7 +40,7 @@ url_map = 'https://www.openstreetmap.org/'\
 url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
 
 # xpaths
-wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title'
+wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href'
 title_xpath = '//*[contains(@class,"wikibase-title-label")]'
 description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
 property_xpath = '//div[@id="{propertyid}"]'
@@ -57,22 +57,21 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
 
 
 def request(query, params):
-    language = match_language(params['language'], supported_languages).split('-')[0]
-
     params['url'] = url_search.format(
-        query=urlencode({'label': query, 'language': language}))
+        query=urlencode({'search': query}))
     return params
 
 
 def response(resp):
     results = []
     html = fromstring(resp.text)
-    wikidata_ids = html.xpath(wikidata_ids_xpath)
+    search_results = html.xpath(wikidata_ids_xpath)
 
     language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
 
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
-    for wikidata_id in wikidata_ids[:result_count]:
+    for search_result in search_results[:result_count]:
+        wikidata_id = search_result.split('/')[-1]
         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
         htmlresponse = get(url)
         jsonresponse = loads(htmlresponse.text)
diff --git a/searx/settings.yml b/searx/settings.yml
@@ -174,6 +174,7 @@ engines:
   - name : wikidata
     engine : wikidata
     shortcut : wd
+    timeout : 3.0
     weight : 2
 
   - name : duckduckgo

M	searx/engines/wikidata.py	13	++++++-------
M	searx/settings.yml	1	+