Browse Source

fix twitter engine and add comments

* add language-support
* add comments
* little refactoring
Thomas Pointhuber 10 years ago
parent
commit
9460750fea
1 changed files with 35 additions and 2 deletions
  1. 35 2
      searx/engines/twitter.py

+ 35 - 2
searx/engines/twitter.py

@@ -1,30 +1,63 @@
+## Twitter (Social media)
+# 
+# @website     https://www.bing.com/news
+# @provide-api yes (https://dev.twitter.com/docs/using-search)
+# 
+# @using-api   no
+# @results     HTML (using search portal)
+# @stable      no (HTML can change)
+# @parse       url, title, content
+#
+# @todo        publishedDate
+
 from urlparse import urljoin
 from urlparse import urljoin
 from urllib import urlencode
 from urllib import urlencode
 from lxml import html
 from lxml import html
 from cgi import escape
 from cgi import escape
 
 
+# engine dependent config
 categories = ['social media']
 categories = ['social media']
+language_support = True
 
 
+# search-url
 base_url = 'https://twitter.com/'
 base_url = 'https://twitter.com/'
 search_url = base_url+'search?'
 search_url = base_url+'search?'
+
+# specific xpath variables
+results_xpath = '//li[@data-item-type="tweet"]'
+link_xpath = './/small[@class="time"]//a'
 title_xpath = './/span[@class="username js-action-profile-name"]//text()'
 title_xpath = './/span[@class="username js-action-profile-name"]//text()'
 content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
 content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
 
 
 
 
+# do search-request
 def request(query, params):
 def request(query, params):
     params['url'] = search_url + urlencode({'q': query})
     params['url'] = search_url + urlencode({'q': query})
+
+    # set language if specified
+    if params['language'] != 'all':
+        params['cookies']['lang'] = params['language'].split('_')[0]
+
     return params
     return params
 
 
 
 
+# get response from search-request
 def response(resp):
 def response(resp):
     results = []
     results = []
+
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
-    for tweet in dom.xpath('//li[@data-item-type="tweet"]'):
+
-        link = tweet.xpath('.//small[@class="time"]//a')[0]
+    # parse results
+    for tweet in dom.xpath(results_xpath):
+        link = tweet.xpath(link_xpath)[0]
         url = urljoin(base_url, link.attrib.get('href'))
         url = urljoin(base_url, link.attrib.get('href'))
         title = ''.join(tweet.xpath(title_xpath))
         title = ''.join(tweet.xpath(title_xpath))
         content = escape(''.join(tweet.xpath(content_xpath)))
         content = escape(''.join(tweet.xpath(content_xpath)))
+
+        # append result
         results.append({'url': url,
         results.append({'url': url,
                         'title': title,
                         'title': title,
                         'content': content})
                         'content': content})
+
+    # return results
     return results
     return results