Browse Source

[enh] csv output support

asciimoo 11 years ago
parent
commit
a192438e9a
2 changed files with 46 additions and 0 deletions
  1. 33 0
      searx/utils.py
  2. 13 0
      searx/webapp.py

+ 33 - 0
searx/utils.py

@@ -1,5 +1,8 @@
 from HTMLParser import HTMLParser
 import htmlentitydefs
+import csv
+import codecs
+import cStringIO
 
 class HTMLTextExtractor(HTMLParser):
     def __init__(self):
@@ -24,3 +27,33 @@ def html_to_text(html):
     s = HTMLTextExtractor()
     s.feed(html)
     return s.get_text()
+
+
+class UnicodeWriter:
+    """
+    A CSV writer which will write rows to CSV file "f",
+    which is encoded in the given encoding.
+    """
+
+    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
+        # Redirect output to a queue
+        self.queue = cStringIO.StringIO()
+        self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
+        self.stream = f
+        self.encoder = codecs.getincrementalencoder(encoding)()
+
+    def writerow(self, row):
+        self.writer.writerow([(s.encode("utf-8").strip() if type(s) == str or type(s) == unicode else str(s)) for s in row])
+        # Fetch UTF-8 output from the queue ...
+        data = self.queue.getvalue()
+        data = data.decode("utf-8")
+        # ... and reencode it into the target encoding
+        data = self.encoder.encode(data)
+        # write to the target stream
+        self.stream.write(data)
+        # empty queue
+        self.queue.truncate(0)
+
+    def writerows(self, rows):
+        for row in rows:
+            self.writerow(row)

+ 13 - 0
searx/webapp.py

@@ -26,6 +26,8 @@ from flask import Flask, request, render_template, url_for, Response, make_respo
 from searx.engines import search, categories, engines, get_engines_stats
 from searx import settings
 import json
+import cStringIO
+from searx.utils import UnicodeWriter
 
 
 app = Flask(__name__)
@@ -104,6 +106,17 @@ def index():
              result['pretty_url'] = result['url']
     if request_data.get('format') == 'json':
         return Response(json.dumps({'query': query, 'results': results}), mimetype='application/json')
+    elif request_data.get('format') == 'csv':
+        csv = UnicodeWriter(cStringIO.StringIO())
+        if len(results):
+            keys = results[0].keys()
+            csv.writerow(keys)
+            for row in results:
+                csv.writerow([row[key] for key in keys])
+        csv.stream.seek(0)
+        response = Response(csv.stream.read(), mimetype='application/csv', )
+        response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format(query))
+        return response
     template = render('results.html'
                         ,results=results
                         ,q=request_data['q']