logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: cf09b500f35fd1bca3fc9cc853bd7ea932220e4e
parent 5cbe4c53329a1fd2b949660fda25ff7a4ce6f254
Author: Guilhem Bonnefille <guilhem.bonnefille@gmail.com>
Date:   Sun,  3 Apr 2016 22:03:41 +0200

Add support for dokuwiki engine

Diffstat:

Asearx/engines/doku.py83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/unit/engines/test_doku.py86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 169 insertions(+), 0 deletions(-)

diff --git a/searx/engines/doku.py b/searx/engines/doku.py @@ -0,0 +1,83 @@ +# Doku Wiki +# +# @website https://www.dokuwiki.org/ +# @provide-api yes +# (https://www.dokuwiki.org/devel:xmlrpc) +# +# @using-api no +# @results HTML +# @stable yes +# @parse (general) url, title, content + +from urllib import urlencode +from lxml.html import fromstring +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' +paging = False +language_support = False +number_of_results = 5 + +# search-url +# Doku is OpenSearch compatible +base_url = 'http://localhost:8090' +search_url = '/?do=search'\ + '&id={query}' +# TODO '&startRecord={offset}'\ +# TODO '&maximumRecords={limit}'\ + +# do search-request +def request(query, params): + + params['url'] = base_url +\ + search_url.format(query=urlencode({'query': query})) + + return params + + +# get response from search-request +def response(resp): + results = [] + + doc = fromstring(resp.text) + + # parse results + # Quickhits + for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): + try: + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + except: + continue + + if not res_url: + continue + + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + + # append result + results.append({'title': title, + 'content': "", + 'url': base_url + res_url}) + + # Search results + for r in doc.xpath('//dl[@class="search_results"]/*'): + try: + if r.tag == "dt": + res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] + title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) + elif r.tag == "dd": + content = extract_text(r.xpath('.')) + + # append result + results.append({'title': title, + 'content': content, + 'url': base_url + res_url}) + except: + continue + + if not res_url: + continue + + # return results + return results diff --git a/tests/unit/engines/test_doku.py b/tests/unit/engines/test_doku.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import doku +from searx.testing import SearxTestCase + + +class TestDokuEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + params = doku.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + + def test_response(self): + self.assertRaises(AttributeError, doku.response, None) + self.assertRaises(AttributeError, doku.response, []) + self.assertRaises(AttributeError, doku.response, '') + self.assertRaises(AttributeError, doku.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(doku.response(response), []) + + html = u""" + <div class="search_quickresult"> + <h3>Pages trouvées :</h3> + <ul class="search_quickhits"> + <li> <a href="/xfconf-query" class="wikilink1" title="xfconf-query">xfconf-query</a></li> + </ul> + <div class="clearer"></div> + </div> + """ + response = mock.Mock(text=html) + results = doku.response(response) + self.assertEqual(doku.response(response), [{'content': '', 'title': 'xfconf-query', 'url': 'http://localhost:8090/xfconf-query'}]) + + html = u""" + <dl class="search_results"> + <dt><a href="/xvnc?s[]=query" class="wikilink1" title="xvnc">xvnc</a>: 40 Occurrences trouvées</dt> + <dd>er = /usr/bin/Xvnc + server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 640x480 -depth 8 -Secur... er = /usr/bin/Xvnc + server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 800x600 -depth 8 -Secur... er = /usr/bin/Xvnc + server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 1024x768 -depth 8 -Secu... er = /usr/bin/Xvnc + server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 1280x1024 -depth 8 -Sec</dd> + <dt><a href="/postfix_mysql_tls_sasl_1404?s[]=query" class="wikilink1" title="postfix_mysql_tls_sasl_1404">postfix_mysql_tls_sasl_1404</a>: 14 Occurrences trouvées</dt> + <dd>tdepasse + hosts = 127.0.0.1 + dbname = postfix + <strong class="search_hit">query</strong> = SELECT goto FROM alias WHERE address='%s' AND a... tdepasse + hosts = 127.0.0.1 + dbname = postfix + <strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s' + #optional <strong class="search_hit">query</strong> to use when relaying for backup MX + #<strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s' and backupmx =</dd><dt><a href="/tutoriel/comment_creer_un_terminal_x_ou_recycler_une_vieille_machine?s[]=query" class="wikilink1" title="tutoriel:comment_creer_un_terminal_x_ou_recycler_une_vieille_machine">tutoriel:comment_creer_un_terminal_x_ou_recycler_une_vieille_machine</a>: 13 Occurrences trouvées</dt><dd>z gdm (ubuntu) tapez sudo /etc/init.d/gdm stop +X -<strong class="search_hit">query</strong> 192.168.1.2 +&lt;/code&gt; +:) +Si vous désirez, sur la mê... ans une console (tjs sur le vieil ordi) +&lt;code&gt; +X -<strong class="search_hit">query</strong> 192.168.1.2 :1 +&lt;/code&gt; +Un écran de login devrait ... ure. +&lt;note tip&gt;Rajouter "-once" à la commande "X -<strong class="search_hit">query</strong> 192.168.1.2 :1" permet de quitter la session et r... d'une ubuntu/kubuntu\\ +Testez d'abord que le //X -<strong class="search_hit">query</strong> ...// fonctionne, dans une console (CTRL-ALT-F1) </dd> + <dt><a href="/bind9?s[]=query" class="wikilink1" title="bind9">bind9</a>: 12 Occurrences trouvées</dt> + <dd> printcmd +;; Got answer: +;; -&gt;&gt;HEADER&lt;&lt;- opcode: <strong class="search_hit">QUERY</strong>, status: NOERROR, id: 13427 +;; flags: qr aa rd ra; <strong class="search_hit">QUERY</strong>: 1, ANSWER: 1, AUTHORITY: 1, ADDITIONAL: 1 + +[...] + +;; <strong class="search_hit">Query</strong> time: 1 msec +;; SERVER: 127.0.0.1#53(127.0.0.1) +;... ne énorme diminution du temps mis par la requête (<strong class="search_hit">Query</strong> time) , entre la première et la deuxième requête.</dd> + </dl> + """ + response = mock.Mock(text=html) + results = doku.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 4) + self.assertEqual(results[0]['title'], 'xvnc') +# FIXME self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű') +# FIXME self.assertEqual(results[0]['content'], 'This should be the content.')