logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git

autocomplete.py (6585B)


  1. '''
  2. searx is free software: you can redistribute it and/or modify
  3. it under the terms of the GNU Affero General Public License as published by
  4. the Free Software Foundation, either version 3 of the License, or
  5. (at your option) any later version.
  6. searx is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. GNU Affero General Public License for more details.
  10. You should have received a copy of the GNU Affero General Public License
  11. along with searx. If not, see < http://www.gnu.org/licenses/ >.
  12. (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
  13. '''
  14. from lxml import etree
  15. from json import loads
  16. from searx import settings
  17. from searx.languages import language_codes
  18. from searx.engines import (
  19. categories, engines, engine_shortcuts
  20. )
  21. from searx.poolrequests import get as http_get
  22. from searx.url_utils import urlencode
  23. def get(*args, **kwargs):
  24. if 'timeout' not in kwargs:
  25. kwargs['timeout'] = settings['outgoing']['request_timeout']
  26. return http_get(*args, **kwargs)
  27. def searx_bang(full_query):
  28. '''check if the searchQuery contain a bang, and create fitting autocompleter results'''
  29. # check if there is a query which can be parsed
  30. if len(full_query.getSearchQuery()) == 0:
  31. return []
  32. results = []
  33. # check if current query stats with !bang
  34. first_char = full_query.getSearchQuery()[0]
  35. if first_char == '!' or first_char == '?':
  36. if len(full_query.getSearchQuery()) == 1:
  37. # show some example queries
  38. # TODO, check if engine is not avaliable
  39. results.append(first_char + "images")
  40. results.append(first_char + "wikipedia")
  41. results.append(first_char + "osm")
  42. else:
  43. engine_query = full_query.getSearchQuery()[1:]
  44. # check if query starts with categorie name
  45. for categorie in categories:
  46. if categorie.startswith(engine_query):
  47. results.append(first_char + '{categorie}'.format(categorie=categorie))
  48. # check if query starts with engine name
  49. for engine in engines:
  50. if engine.startswith(engine_query.replace('_', ' ')):
  51. results.append(first_char + '{engine}'.format(engine=engine.replace(' ', '_')))
  52. # check if query starts with engine shortcut
  53. for engine_shortcut in engine_shortcuts:
  54. if engine_shortcut.startswith(engine_query):
  55. results.append(first_char + '{engine_shortcut}'.format(engine_shortcut=engine_shortcut))
  56. # check if current query stats with :bang
  57. elif first_char == ':':
  58. if len(full_query.getSearchQuery()) == 1:
  59. # show some example queries
  60. results.append(":en")
  61. results.append(":en_us")
  62. results.append(":english")
  63. results.append(":united_kingdom")
  64. else:
  65. engine_query = full_query.getSearchQuery()[1:]
  66. for lc in language_codes:
  67. lang_id, lang_name, country, english_name = map(unicode.lower, lc)
  68. # check if query starts with language-id
  69. if lang_id.startswith(engine_query):
  70. if len(engine_query) <= 2:
  71. results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
  72. else:
  73. results.append(u':{lang_id}'.format(lang_id=lang_id))
  74. # check if query starts with language name
  75. if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
  76. results.append(u':{lang_name}'.format(lang_name=lang_name))
  77. # check if query starts with country
  78. if country.startswith(engine_query.replace('_', ' ')):
  79. results.append(u':{country}'.format(country=country.replace(' ', '_')))
  80. # remove duplicates
  81. result_set = set(results)
  82. # remove results which are already contained in the query
  83. for query_part in full_query.query_parts:
  84. if query_part in result_set:
  85. result_set.remove(query_part)
  86. # convert result_set back to list
  87. return list(result_set)
  88. def dbpedia(query, lang):
  89. # dbpedia autocompleter, no HTTPS
  90. autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
  91. response = get(autocomplete_url + urlencode(dict(QueryString=query)))
  92. results = []
  93. if response.ok:
  94. dom = etree.fromstring(response.content)
  95. results = dom.xpath('//a:Result/a:Label//text()',
  96. namespaces={'a': 'http://lookup.dbpedia.org/'})
  97. return results
  98. def duckduckgo(query, lang):
  99. # duckduckgo autocompleter
  100. url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
  101. resp = loads(get(url.format(urlencode(dict(q=query)))).text)
  102. if len(resp) > 1:
  103. return resp[1]
  104. return []
  105. def google(query, lang):
  106. # google autocompleter
  107. autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
  108. response = get(autocomplete_url + urlencode(dict(hl=lang, q=query)))
  109. results = []
  110. if response.ok:
  111. dom = etree.fromstring(response.text)
  112. results = dom.xpath('//suggestion/@data')
  113. return results
  114. def startpage(query, lang):
  115. # startpage autocompleter
  116. url = 'https://startpage.com/do/suggest?{query}'
  117. resp = get(url.format(query=urlencode({'query': query}))).text.split('\n')
  118. if len(resp) > 1:
  119. return resp
  120. return []
  121. def qwant(query, lang):
  122. # qwant autocompleter (additional parameter : lang=en_en&count=xxx )
  123. url = 'https://api.qwant.com/api/suggest?{query}'
  124. resp = get(url.format(query=urlencode({'q': query, 'lang': lang})))
  125. results = []
  126. if resp.ok:
  127. data = loads(resp.text)
  128. if data['status'] == 'success':
  129. for item in data['data']['items']:
  130. results.append(item['value'])
  131. return results
  132. def wikipedia(query, lang):
  133. # wikipedia autocompleter
  134. url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json'
  135. resp = loads(get(url.format(urlencode(dict(search=query)))).text)
  136. if len(resp) > 1:
  137. return resp[1]
  138. return []
  139. backends = {'dbpedia': dbpedia,
  140. 'duckduckgo': duckduckgo,
  141. 'google': google,
  142. 'startpage': startpage,
  143. 'qwant': qwant,
  144. 'wikipedia': wikipedia
  145. }