logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git

reddit.py (2071B)


  1. """
  2. Reddit
  3. @website https://www.reddit.com/
  4. @provide-api yes (https://www.reddit.com/dev/api)
  5. @using-api yes
  6. @results JSON
  7. @stable yes
  8. @parse url, title, content, thumbnail, publishedDate
  9. """
  10. import json
  11. from datetime import datetime
  12. from searx.url_utils import urlencode, urljoin, urlparse
  13. # engine dependent config
  14. categories = ['general', 'images', 'news', 'social media']
  15. page_size = 25
  16. # search-url
  17. base_url = 'https://www.reddit.com/'
  18. search_url = base_url + 'search.json?{query}'
  19. # do search-request
  20. def request(query, params):
  21. query = urlencode({'q': query, 'limit': page_size})
  22. params['url'] = search_url.format(query=query)
  23. return params
  24. # get response from search-request
  25. def response(resp):
  26. img_results = []
  27. text_results = []
  28. search_results = json.loads(resp.text)
  29. # return empty array if there are no results
  30. if 'data' not in search_results:
  31. return []
  32. posts = search_results.get('data', {}).get('children', [])
  33. # process results
  34. for post in posts:
  35. data = post['data']
  36. # extract post information
  37. params = {
  38. 'url': urljoin(base_url, data['permalink']),
  39. 'title': data['title']
  40. }
  41. # if thumbnail field contains a valid URL, we need to change template
  42. thumbnail = data['thumbnail']
  43. url_info = urlparse(thumbnail)
  44. # netloc & path
  45. if url_info[1] != '' and url_info[2] != '':
  46. params['img_src'] = data['url']
  47. params['thumbnail_src'] = thumbnail
  48. params['template'] = 'images.html'
  49. img_results.append(params)
  50. else:
  51. created = datetime.fromtimestamp(data['created_utc'])
  52. content = data['selftext']
  53. if len(content) > 500:
  54. content = content[:500] + '...'
  55. params['content'] = content
  56. params['publishedDate'] = created
  57. text_results.append(params)
  58. # show images first and text results second
  59. return img_results + text_results