logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git

subtitleseeker.py (2317B)


  1. """
  2. Subtitleseeker (Video)
  3. @website http://www.subtitleseeker.com
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change)
  8. @parse url, title, content
  9. """
  10. from lxml import html
  11. from searx.languages import language_codes
  12. from searx.engines.xpath import extract_text
  13. from searx.url_utils import quote_plus
  14. # engine dependent config
  15. categories = ['videos']
  16. paging = True
  17. language = ""
  18. # search-url
  19. url = 'http://www.subtitleseeker.com/'
  20. search_url = url + 'search/TITLES/{query}?p={pageno}'
  21. # specific xpath variables
  22. results_xpath = '//div[@class="boxRows"]'
  23. # do search-request
  24. def request(query, params):
  25. params['url'] = search_url.format(query=quote_plus(query),
  26. pageno=params['pageno'])
  27. return params
  28. # get response from search-request
  29. def response(resp):
  30. results = []
  31. dom = html.fromstring(resp.text)
  32. search_lang = ""
  33. # dirty fix for languages named differenly in their site
  34. if resp.search_params['language'][:2] == 'fa':
  35. search_lang = 'Farsi'
  36. elif resp.search_params['language'] == 'pt-BR':
  37. search_lang = 'Brazilian'
  38. else:
  39. search_lang = [lc[3]
  40. for lc in language_codes
  41. if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
  42. search_lang = search_lang[0].split(' (')[0]
  43. # parse results
  44. for result in dom.xpath(results_xpath):
  45. link = result.xpath(".//a")[0]
  46. href = link.attrib.get('href')
  47. if language is not "":
  48. href = href + language + '/'
  49. elif search_lang:
  50. href = href + search_lang + '/'
  51. title = extract_text(link)
  52. content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
  53. content = content + " - "
  54. text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
  55. content = content + text
  56. if result.xpath(".//span") != []:
  57. content = content +\
  58. " - (" +\
  59. extract_text(result.xpath(".//span")) +\
  60. ")"
  61. # append result
  62. results.append({'url': href,
  63. 'title': title,
  64. 'content': content})
  65. # return results
  66. return results