logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git

test_wikidata.py (21170B)


  1. # -*- coding: utf-8 -*-
  2. from lxml.html import fromstring
  3. from collections import defaultdict
  4. import mock
  5. from searx.engines import wikidata
  6. from searx.testing import SearxTestCase
  7. class TestWikidataEngine(SearxTestCase):
  8. def test_request(self):
  9. query = 'test_query'
  10. dicto = defaultdict(dict)
  11. params = wikidata.request(query, dicto)
  12. self.assertIn('url', params)
  13. self.assertIn(query, params['url'])
  14. self.assertIn('wikidata.org', params['url'])
  15. params = wikidata.request(query, dicto)
  16. self.assertIn(query, params['url'])
  17. # successful cases are not tested here to avoid sending additional requests
  18. def test_response(self):
  19. self.assertRaises(AttributeError, wikidata.response, None)
  20. self.assertRaises(AttributeError, wikidata.response, [])
  21. self.assertRaises(AttributeError, wikidata.response, '')
  22. self.assertRaises(AttributeError, wikidata.response, '[]')
  23. wikidata.supported_languages = ['en', 'es']
  24. response = mock.Mock(text='<html></html>', search_params={"language": "en"})
  25. self.assertEqual(wikidata.response(response), [])
  26. def test_getDetail(self):
  27. response = {}
  28. results = wikidata.getDetail(response, "Q123", "en", "en-US")
  29. self.assertEqual(results, [])
  30. title_html = '<div><div class="wikibase-title-label">Test</div></div>'
  31. html = """
  32. <div>
  33. <div class="wikibase-entitytermsview-heading-description">
  34. </div>
  35. <div>
  36. <ul class="wikibase-sitelinklistview-listview">
  37. <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>
  38. </ul>
  39. </div>
  40. </div>
  41. """
  42. response = {"parse": {"displaytitle": title_html, "text": html}}
  43. results = wikidata.getDetail(response, "Q123", "en", "en-US")
  44. self.assertEqual(len(results), 1)
  45. self.assertEqual(results[0]['url'], 'https://en.wikipedia.org/wiki/Test')
  46. title_html = """
  47. <div>
  48. <div class="wikibase-title-label">
  49. <span lang="en">Test</span>
  50. <sup class="wb-language-fallback-indicator">English</sup>
  51. </div>
  52. </div>
  53. """
  54. html = """
  55. <div>
  56. <div class="wikibase-entitytermsview-heading-description">
  57. <span lang="en">Description</span>
  58. <sup class="wb-language-fallback-indicator">English</sup>
  59. </div>
  60. <div id="P856">
  61. <div class="wikibase-statementgroupview-property-label">
  62. <a href="/wiki/Property:P856">
  63. <span lang="en">official website</span>
  64. <sup class="wb-language-fallback-indicator">English</sup>
  65. </a>
  66. </div>
  67. <div class="wikibase-statementview-mainsnak">
  68. <a class="external free" href="https://officialsite.com">
  69. https://officialsite.com
  70. </a>
  71. </div>
  72. </div>
  73. <div>
  74. <ul class="wikibase-sitelinklistview-listview">
  75. <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>
  76. </ul>
  77. </div>
  78. </div>
  79. """
  80. response = {"parse": {"displaytitle": title_html, "text": html}}
  81. results = wikidata.getDetail(response, "Q123", "yua", "yua_MX")
  82. self.assertEqual(len(results), 2)
  83. self.assertEqual(results[0]['title'], 'Official website')
  84. self.assertEqual(results[0]['url'], 'https://officialsite.com')
  85. self.assertEqual(results[1]['infobox'], 'Test')
  86. self.assertEqual(results[1]['id'], None)
  87. self.assertEqual(results[1]['content'], 'Description')
  88. self.assertEqual(results[1]['attributes'], [])
  89. self.assertEqual(results[1]['urls'][0]['title'], 'Official website')
  90. self.assertEqual(results[1]['urls'][0]['url'], 'https://officialsite.com')
  91. self.assertEqual(results[1]['urls'][1]['title'], 'Wikipedia (en)')
  92. self.assertEqual(results[1]['urls'][1]['url'], 'https://en.wikipedia.org/wiki/Test')
  93. def test_add_image(self):
  94. image_src = wikidata.add_image(fromstring("<div></div>"))
  95. self.assertEqual(image_src, None)
  96. html = u"""
  97. <div>
  98. <div id="P18">
  99. <div class="wikibase-statementgroupview-property-label">
  100. <a href="/wiki/Property:P18">
  101. image
  102. </a>
  103. </div>
  104. <div class="wikibase-statementlistview">
  105. <div class="wikibase-statementview listview-item">
  106. <div class="wikibase-statementview-rankselector">
  107. <span class="wikibase-rankselector-normal"></span>
  108. </div>
  109. <div class="wikibase-statementview-mainsnak">
  110. <div>
  111. <div class="wikibase-snakview-value">
  112. <div class="commons-media-caption">
  113. <a href="https://commons.wikimedia.org/wiki/File:image.png">image.png</a>
  114. <br/>2,687 &#215; 3,356; 1.22 MB
  115. </div>
  116. </div>
  117. </div>
  118. </div>
  119. </div>
  120. </div>
  121. </div>
  122. </div>
  123. """
  124. html_etree = fromstring(html)
  125. image_src = wikidata.add_image(html_etree)
  126. self.assertEqual(image_src,
  127. "https://commons.wikimedia.org/wiki/Special:FilePath/image.png?width=500&height=400")
  128. html = u"""
  129. <div>
  130. <div id="P2910">
  131. <div class="wikibase-statementgroupview-property-label">
  132. <a href="/wiki/Property:P2910">
  133. icon
  134. </a>
  135. </div>
  136. <div class="wikibase-statementlistview">
  137. <div class="wikibase-statementview listview-item">
  138. <div class="wikibase-statementview-rankselector">
  139. <span class="wikibase-rankselector-normal"></span>
  140. </div>
  141. <div class="wikibase-statementview-mainsnak">
  142. <div>
  143. <div class="wikibase-snakview-value">
  144. <div class="commons-media-caption">
  145. <a href="https://commons.wikimedia.org/wiki/File:icon.png">icon.png</a>
  146. <br/>671 &#215; 671; 18 KB</div>
  147. </div>
  148. </div>
  149. </div>
  150. </div>
  151. </div>
  152. </div>
  153. </div>
  154. <div id="P154">
  155. <div class="wikibase-statementgroupview-property-label">
  156. <a href="/wiki/Property:P154">
  157. logo
  158. </a>
  159. </div>
  160. <div class="wikibase-statementlistview">
  161. <div class="wikibase-statementview listview-item">
  162. <div class="wikibase-statementview-rankselector">
  163. <span class="wikibase-rankselector-normal"></span>
  164. </div>
  165. <div class="wikibase-statementview-mainsnak">
  166. <div>
  167. <div class="wikibase-snakview-value">
  168. <div class="commons-media-caption">
  169. <a href="https://commons.wikimedia.org/wiki/File:logo.png">logo.png</a>
  170. <br/>170 &#215; 170; 1 KB
  171. </div>
  172. </div>
  173. </div>
  174. </div>
  175. </div>
  176. </div>
  177. </div>
  178. </div>
  179. """
  180. html_etree = fromstring(html)
  181. image_src = wikidata.add_image(html_etree)
  182. self.assertEqual(image_src,
  183. "https://commons.wikimedia.org/wiki/Special:FilePath/logo.png?width=500&height=400")
  184. def test_add_attribute(self):
  185. html = u"""
  186. <div>
  187. <div id="P27">
  188. <div class="wikibase-statementgroupview-property-label">
  189. <a href="/wiki/Property:P27">
  190. country of citizenship
  191. </a>
  192. </div>
  193. <div class="wikibase-statementlistview">
  194. <div class="wikibase-statementview listview-item">
  195. <div class="wikibase-statementview-rankselector">
  196. <span class="wikibase-rankselector-normal"></span>
  197. </div>
  198. <div class="wikibase-statementview-mainsnak">
  199. <div>
  200. <div class="wikibase-snakview-value">
  201. <a href="/wiki/Q145">
  202. United Kingdom
  203. </a>
  204. </div>
  205. </div>
  206. </div>
  207. </div>
  208. </div>
  209. </div>
  210. </div>
  211. """
  212. attributes = []
  213. html_etree = fromstring(html)
  214. wikidata.add_attribute(attributes, html_etree, "Fail")
  215. self.assertEqual(attributes, [])
  216. wikidata.add_attribute(attributes, html_etree, "P27")
  217. self.assertEqual(len(attributes), 1)
  218. self.assertEqual(attributes[0]["label"], "Country of citizenship")
  219. self.assertEqual(attributes[0]["value"], "United Kingdom")
  220. html = u"""
  221. <div>
  222. <div id="P569">
  223. <div class="wikibase-statementgroupview-property-label">
  224. <a href="/wiki/Property:P569">
  225. date of birth
  226. </a>
  227. </div>
  228. <div class="wikibase-statementlistview">
  229. <div class="wikibase-statementview listview-item">
  230. <div class="wikibase-statementview-rankselector">
  231. <span class="wikibase-rankselector-normal"></span>
  232. </div>
  233. <div class="wikibase-statementview-mainsnak">
  234. <div>
  235. <div class="wikibase-snakview-value">
  236. 27 January 1832
  237. <sup class="wb-calendar-name">
  238. Gregorian
  239. </sup>
  240. </div>
  241. </div>
  242. </div>
  243. </div>
  244. </div>
  245. </div>
  246. </div>
  247. """
  248. attributes = []
  249. html_etree = fromstring(html)
  250. wikidata.add_attribute(attributes, html_etree, "P569", date=True)
  251. self.assertEqual(len(attributes), 1)
  252. self.assertEqual(attributes[0]["label"], "Date of birth")
  253. self.assertEqual(attributes[0]["value"], "27 January 1832")
  254. html = u"""
  255. <div>
  256. <div id="P6">
  257. <div class="wikibase-statementgroupview-property-label">
  258. <a href="/wiki/Property:P27">
  259. head of government
  260. </a>
  261. </div>
  262. <div class="wikibase-statementlistview">
  263. <div class="wikibase-statementview listview-item">
  264. <div class="wikibase-statementview-rankselector">
  265. <span class="wikibase-rankselector-normal"></span>
  266. </div>
  267. <div class="wikibase-statementview-mainsnak">
  268. <div>
  269. <div class="wikibase-snakview-value">
  270. <a href="/wiki/Q206">
  271. Old Prime Minister
  272. </a>
  273. </div>
  274. </div>
  275. </div>
  276. </div>
  277. <div class="wikibase-statementview listview-item">
  278. <div class="wikibase-statementview-rankselector">
  279. <span class="wikibase-rankselector-preferred"></span>
  280. </div>
  281. <div class="wikibase-statementview-mainsnak">
  282. <div>
  283. <div class="wikibase-snakview-value">
  284. <a href="/wiki/Q3099714">
  285. Actual Prime Minister
  286. </a>
  287. </div>
  288. </div>
  289. </div>
  290. </div>
  291. </div>
  292. </div>
  293. </div>
  294. """
  295. attributes = []
  296. html_etree = fromstring(html)
  297. wikidata.add_attribute(attributes, html_etree, "P6")
  298. self.assertEqual(len(attributes), 1)
  299. self.assertEqual(attributes[0]["label"], "Head of government")
  300. self.assertEqual(attributes[0]["value"], "Old Prime Minister, Actual Prime Minister")
  301. attributes = []
  302. html_etree = fromstring(html)
  303. wikidata.add_attribute(attributes, html_etree, "P6", trim=True)
  304. self.assertEqual(len(attributes), 1)
  305. self.assertEqual(attributes[0]["value"], "Actual Prime Minister")
  306. def test_add_url(self):
  307. html = u"""
  308. <div>
  309. <div id="P856">
  310. <div class="wikibase-statementgroupview-property-label">
  311. <a href="/wiki/Property:P856">
  312. official website
  313. </a>
  314. </div>
  315. <div class="wikibase-statementlistview">
  316. <div class="wikibase-statementview listview-item">
  317. <div class="wikibase-statementview-mainsnak">
  318. <div>
  319. <div class="wikibase-snakview-value">
  320. <a class="external free" href="https://searx.me">
  321. https://searx.me/
  322. </a>
  323. </div>
  324. </div>
  325. </div>
  326. </div>
  327. </div>
  328. </div>
  329. </div>
  330. """
  331. urls = []
  332. html_etree = fromstring(html)
  333. wikidata.add_url(urls, html_etree, 'P856')
  334. self.assertEquals(len(urls), 1)
  335. self.assertIn({'title': 'Official website', 'url': 'https://searx.me/'}, urls)
  336. urls = []
  337. results = []
  338. wikidata.add_url(urls, html_etree, 'P856', 'custom label', results=results)
  339. self.assertEquals(len(urls), 1)
  340. self.assertEquals(len(results), 1)
  341. self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, urls)
  342. self.assertIn({'title': 'custom label', 'url': 'https://searx.me/'}, results)
  343. html = u"""
  344. <div>
  345. <div id="P856">
  346. <div class="wikibase-statementgroupview-property-label">
  347. <a href="/wiki/Property:P856">
  348. official website
  349. </a>
  350. </div>
  351. <div class="wikibase-statementlistview">
  352. <div class="wikibase-statementview listview-item">
  353. <div class="wikibase-statementview-mainsnak">
  354. <div>
  355. <div class="wikibase-snakview-value">
  356. <a class="external free" href="http://www.worldofwarcraft.com">
  357. http://www.worldofwarcraft.com
  358. </a>
  359. </div>
  360. </div>
  361. </div>
  362. </div>
  363. <div class="wikibase-statementview listview-item">
  364. <div class="wikibase-statementview-mainsnak">
  365. <div>
  366. <div class="wikibase-snakview-value">
  367. <a class="external free" href="http://eu.battle.net/wow/en/">
  368. http://eu.battle.net/wow/en/
  369. </a>
  370. </div>
  371. </div>
  372. </div>
  373. </div>
  374. </div>
  375. </div>
  376. </div>
  377. """
  378. urls = []
  379. html_etree = fromstring(html)
  380. wikidata.add_url(urls, html_etree, 'P856')
  381. self.assertEquals(len(urls), 2)
  382. self.assertIn({'title': 'Official website', 'url': 'http://www.worldofwarcraft.com'}, urls)
  383. self.assertIn({'title': 'Official website', 'url': 'http://eu.battle.net/wow/en/'}, urls)
  384. def test_get_imdblink(self):
  385. html = u"""
  386. <div>
  387. <div class="wikibase-statementview-mainsnak">
  388. <div>
  389. <div class="wikibase-snakview-value">
  390. <a class="wb-external-id" href="http://www.imdb.com/tt0433664">
  391. tt0433664
  392. </a>
  393. </div>
  394. </div>
  395. </div>
  396. </div>
  397. """
  398. html_etree = fromstring(html)
  399. imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/')
  400. html = u"""
  401. <div>
  402. <div class="wikibase-statementview-mainsnak">
  403. <div>
  404. <div class="wikibase-snakview-value">
  405. <a class="wb-external-id"
  406. href="href="http://tools.wmflabs.org/...http://www.imdb.com/&id=nm4915994"">
  407. nm4915994
  408. </a>
  409. </div>
  410. </div>
  411. </div>
  412. </div>
  413. """
  414. html_etree = fromstring(html)
  415. imdblink = wikidata.get_imdblink(html_etree, 'https://www.imdb.com/')
  416. self.assertIn('https://www.imdb.com/name/nm4915994', imdblink)
  417. def test_get_geolink(self):
  418. html = u"""
  419. <div>
  420. <div class="wikibase-statementview-mainsnak">
  421. <div>
  422. <div class="wikibase-snakview-value">
  423. 60°N, 40°E
  424. </div>
  425. </div>
  426. </div>
  427. </div>
  428. """
  429. html_etree = fromstring(html)
  430. geolink = wikidata.get_geolink(html_etree)
  431. self.assertIn('https://www.openstreetmap.org/', geolink)
  432. self.assertIn('lat=60&lon=40', geolink)
  433. html = u"""
  434. <div>
  435. <div class="wikibase-statementview-mainsnak">
  436. <div>
  437. <div class="wikibase-snakview-value">
  438. 34°35'59"S, 58°22'55"W
  439. </div>
  440. </div>
  441. </div>
  442. </div>
  443. """
  444. html_etree = fromstring(html)
  445. geolink = wikidata.get_geolink(html_etree)
  446. self.assertIn('https://www.openstreetmap.org/', geolink)
  447. self.assertIn('lat=-34.59', geolink)
  448. self.assertIn('lon=-58.38', geolink)
  449. def test_get_wikilink(self):
  450. html = """
  451. <div>
  452. <div>
  453. <ul class="wikibase-sitelinklistview-listview">
  454. <li data-wb-siteid="arwiki"><a href="http://ar.wikipedia.org/wiki/Test">Test</a></li>
  455. <li data-wb-siteid="enwiki"><a href="http://en.wikipedia.org/wiki/Test">Test</a></li>
  456. </ul>
  457. </div>
  458. <div>
  459. <ul class="wikibase-sitelinklistview-listview">
  460. <li data-wb-siteid="enwikiquote"><a href="https://en.wikiquote.org/wiki/Test">Test</a></li>
  461. </ul>
  462. </div>
  463. </div>
  464. """
  465. html_etree = fromstring(html)
  466. wikilink = wikidata.get_wikilink(html_etree, 'nowiki')
  467. self.assertEqual(wikilink, None)
  468. wikilink = wikidata.get_wikilink(html_etree, 'enwiki')
  469. self.assertEqual(wikilink, 'https://en.wikipedia.org/wiki/Test')
  470. wikilink = wikidata.get_wikilink(html_etree, 'arwiki')
  471. self.assertEqual(wikilink, 'https://ar.wikipedia.org/wiki/Test')
  472. wikilink = wikidata.get_wikilink(html_etree, 'enwikiquote')
  473. self.assertEqual(wikilink, 'https://en.wikiquote.org/wiki/Test')