logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git

yacy.py (3054B)


  1. # Yacy (Web, Images, Videos, Music, Files)
  2. #
  3. # @website http://yacy.net
  4. # @provide-api yes
  5. # (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
  6. #
  7. # @using-api yes
  8. # @results JSON
  9. # @stable yes
  10. # @parse (general) url, title, content, publishedDate
  11. # @parse (images) url, title, img_src
  12. #
  13. # @todo parse video, audio and file results
  14. from json import loads
  15. from dateutil import parser
  16. from searx.url_utils import urlencode
  17. from searx.utils import html_to_text
  18. # engine dependent config
  19. categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
  20. paging = True
  21. language_support = True
  22. number_of_results = 5
  23. # search-url
  24. base_url = 'http://localhost:8090'
  25. search_url = '/yacysearch.json?{query}'\
  26. '&startRecord={offset}'\
  27. '&maximumRecords={limit}'\
  28. '&contentdom={search_type}'\
  29. '&resource=global'
  30. # yacy specific type-definitions
  31. search_types = {'general': 'text',
  32. 'images': 'image',
  33. 'files': 'app',
  34. 'music': 'audio',
  35. 'videos': 'video'}
  36. # do search-request
  37. def request(query, params):
  38. offset = (params['pageno'] - 1) * number_of_results
  39. search_type = search_types.get(params.get('category'), '0')
  40. params['url'] = base_url +\
  41. search_url.format(query=urlencode({'query': query}),
  42. offset=offset,
  43. limit=number_of_results,
  44. search_type=search_type)
  45. params['url'] += '&lr=lang_' + params['language'].split('-')[0]
  46. return params
  47. # get response from search-request
  48. def response(resp):
  49. results = []
  50. raw_search_results = loads(resp.text)
  51. # return empty array if there are no results
  52. if not raw_search_results:
  53. return []
  54. search_results = raw_search_results.get('channels', [])
  55. if len(search_results) == 0:
  56. return []
  57. for result in search_results[0].get('items', []):
  58. # parse image results
  59. if result.get('image'):
  60. result_url = ''
  61. if 'url' in result:
  62. result_url = result['url']
  63. elif 'link' in result:
  64. result_url = result['link']
  65. else:
  66. continue
  67. # append result
  68. results.append({'url': result_url,
  69. 'title': result['title'],
  70. 'content': '',
  71. 'img_src': result['image'],
  72. 'template': 'images.html'})
  73. # parse general results
  74. else:
  75. publishedDate = parser.parse(result['pubDate'])
  76. # append result
  77. results.append({'url': result['link'],
  78. 'title': result['title'],
  79. 'content': html_to_text(result['description']),
  80. 'publishedDate': publishedDate})
  81. # TODO parse video, audio and file results
  82. # return results
  83. return results