Browse Source

update generalfile engine and add comments

Thomas Pointhuber 10 years ago
parent
commit
c5d83059d5
2 changed files with 28 additions and 4 deletions
  1. 28 3
      searx/engines/generalfile.py
  2. 0 1
      searx/settings.yml

+ 28 - 3
searx/engines/generalfile.py

@@ -1,35 +1,60 @@
+## General Files (Files)
+# 
+# @website     http://www.general-files.org
+# @provide-api no (nothing found)
+# 
+# @using-api   no (because nothing found)
+# @results     HTML (using search portal)
+# @stable      no (HTML can change)
+# @parse       url, title, content
+#
+# @todo        detect torrents?
+
 from lxml import html
 from lxml import html
 
 
+# engine dependent config
+categories = ['files']
+paging = True
 
 
+# search-url
 base_url = 'http://www.general-file.com'
 base_url = 'http://www.general-file.com'
 search_url = base_url + '/files-{letter}/{query}/{pageno}'
 search_url = base_url + '/files-{letter}/{query}/{pageno}'
 
 
+# specific xpath variables
 result_xpath = '//table[@class="block-file"]'
 result_xpath = '//table[@class="block-file"]'
 title_xpath = './/h2/a//text()'
 title_xpath = './/h2/a//text()'
 url_xpath = './/h2/a/@href'
 url_xpath = './/h2/a/@href'
 content_xpath = './/p//text()'
 content_xpath = './/p//text()'
 
 
-paging = True
-
 
 
+# do search-request
 def request(query, params):
 def request(query, params):
+
     params['url'] = search_url.format(query=query,
     params['url'] = search_url.format(query=query,
                                       letter=query[0],
                                       letter=query[0],
                                       pageno=params['pageno'])
                                       pageno=params['pageno'])
+
     return params
     return params
 
 
 
 
+# get response from search-request
 def response(resp):
 def response(resp):
-
     results = []
     results = []
+
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
+
+    # parse results
     for result in dom.xpath(result_xpath):
     for result in dom.xpath(result_xpath):
         url = result.xpath(url_xpath)[0]
         url = result.xpath(url_xpath)[0]
+
         # skip fast download links
         # skip fast download links
         if not url.startswith('/'):
         if not url.startswith('/'):
             continue
             continue
+
+        # append result
         results.append({'url': base_url + url,
         results.append({'url': base_url + url,
                         'title': ''.join(result.xpath(title_xpath)),
                         'title': ''.join(result.xpath(title_xpath)),
                         'content': ''.join(result.xpath(content_xpath))})
                         'content': ''.join(result.xpath(content_xpath))})
 
 
+    # return results
     return results
     return results

+ 0 - 1
searx/settings.yml

@@ -62,7 +62,6 @@ engines:
 
 
   - name : general-file
   - name : general-file
     engine : generalfile
     engine : generalfile
-    categories : files
     shortcut : gf
     shortcut : gf
 
 
   - name : github
   - name : github