logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git

doku.py (2182B)


  1. # Doku Wiki
  2. #
  3. # @website https://www.dokuwiki.org/
  4. # @provide-api yes
  5. # (https://www.dokuwiki.org/devel:xmlrpc)
  6. #
  7. # @using-api no
  8. # @results HTML
  9. # @stable yes
  10. # @parse (general) url, title, content
  11. from lxml.html import fromstring
  12. from searx.engines.xpath import extract_text
  13. from searx.url_utils import urlencode
  14. # engine dependent config
  15. categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
  16. paging = False
  17. language_support = False
  18. number_of_results = 5
  19. # search-url
  20. # Doku is OpenSearch compatible
  21. base_url = 'http://localhost:8090'
  22. search_url = '/?do=search'\
  23. '&{query}'
  24. # TODO '&startRecord={offset}'\
  25. # TODO '&maximumRecords={limit}'\
  26. # do search-request
  27. def request(query, params):
  28. params['url'] = base_url +\
  29. search_url.format(query=urlencode({'id': query}))
  30. return params
  31. # get response from search-request
  32. def response(resp):
  33. results = []
  34. doc = fromstring(resp.text)
  35. # parse results
  36. # Quickhits
  37. for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'):
  38. try:
  39. res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
  40. except:
  41. continue
  42. if not res_url:
  43. continue
  44. title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
  45. # append result
  46. results.append({'title': title,
  47. 'content': "",
  48. 'url': base_url + res_url})
  49. # Search results
  50. for r in doc.xpath('//dl[@class="search_results"]/*'):
  51. try:
  52. if r.tag == "dt":
  53. res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
  54. title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
  55. elif r.tag == "dd":
  56. content = extract_text(r.xpath('.'))
  57. # append result
  58. results.append({'title': title,
  59. 'content': content,
  60. 'url': base_url + res_url})
  61. except:
  62. continue
  63. if not res_url:
  64. continue
  65. # return results
  66. return results