Browse Source

fix stackoverflow and add comments

Thomas Pointhuber 10 years ago
parent
commit
a46bbb4042
2 changed files with 35 additions and 8 deletions
  1. 35 7
      searx/engines/stackoverflow.py
  2. 0 1
      searx/settings.yml

+ 35 - 7
searx/engines/stackoverflow.py

@@ -1,30 +1,58 @@
+## Stackoverflow (It)
+# 
+# @website     https://stackoverflow.com/
+# @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
+# 
+# @using-api   no
+# @results     HTML
+# @stable      no (HTML can change)
+# @parse       url, title, content
+
 from urlparse import urljoin
 from urlparse import urljoin
 from cgi import escape
 from cgi import escape
 from urllib import urlencode
 from urllib import urlencode
 from lxml import html
 from lxml import html
 
 
+# engine dependent config
 categories = ['it']
 categories = ['it']
+paging = True
 
 
+# search-url
 url = 'http://stackoverflow.com/'
 url = 'http://stackoverflow.com/'
 search_url = url+'search?{query}&page={pageno}'
 search_url = url+'search?{query}&page={pageno}'
-result_xpath = './/div[@class="excerpt"]//text()'
 
 
-paging = True
+# specific xpath variables
+results_xpath = '//div[contains(@class,"question-summary")]'
+link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
+title_xpath = './/text()'
+content_xpath = './/div[@class="excerpt"]//text()'
 
 
 
 
+# do search-request
 def request(query, params):
 def request(query, params):
     params['url'] = search_url.format(query=urlencode({'q': query}),
     params['url'] = search_url.format(query=urlencode({'q': query}),
                                       pageno=params['pageno'])
                                       pageno=params['pageno'])
+
     return params
     return params
 
 
 
 
+# get response from search-request
 def response(resp):
 def response(resp):
     results = []
     results = []
+
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
-    for result in dom.xpath('//div[@class="question-summary search-result"]'):
-        link = result.xpath('.//div[@class="result-link"]//a')[0]
+
+    # parse results
+    for result in dom.xpath(results_xpath):
+        link = result.xpath(link_xpath)[0]
         href = urljoin(url, link.attrib.get('href'))
         href = urljoin(url, link.attrib.get('href'))
-        title = escape(' '.join(link.xpath('.//text()')))
-        content = escape(' '.join(result.xpath(result_xpath)))
-        results.append({'url': href, 'title': title, 'content': content})
+        title = escape(' '.join(link.xpath(title_xpath)))
+        content = escape(' '.join(result.xpath(content_xpath)))
+
+        # append result
+        results.append({'url': href, 
+                        'title': title, 
+                        'content': content})
+
+    # return results
     return results
     return results

+ 0 - 1
searx/settings.yml

@@ -90,7 +90,6 @@ engines:
 
 
   - name : stackoverflow
   - name : stackoverflow
     engine : stackoverflow
     engine : stackoverflow
-    categories : it
     shortcut : st
     shortcut : st
 
 
   - name : startpage
   - name : startpage