|
@@ -25,7 +25,6 @@ from urlparse import urlparse
|
|
from searx import settings
|
|
from searx import settings
|
|
import ConfigParser
|
|
import ConfigParser
|
|
import sys
|
|
import sys
|
|
-import re
|
|
|
|
from datetime import datetime
|
|
from datetime import datetime
|
|
|
|
|
|
engine_dir = dirname(realpath(__file__))
|
|
engine_dir = dirname(realpath(__file__))
|
|
@@ -106,31 +105,6 @@ def make_callback(engine_name, results, suggestions, callback, params):
|
|
results[engine_name] = cb_res
|
|
results[engine_name] = cb_res
|
|
return process_callback
|
|
return process_callback
|
|
|
|
|
|
-def highlight_content(content, query):
|
|
|
|
-
|
|
|
|
- if not content:
|
|
|
|
- return None
|
|
|
|
- # ignoring html contents
|
|
|
|
- # TODO better html content detection
|
|
|
|
- if content.find('<') != -1:
|
|
|
|
- return content
|
|
|
|
-
|
|
|
|
- query = query.decode('utf-8')
|
|
|
|
- if content.lower().find(query.lower()) > -1:
|
|
|
|
- query_regex = u'({0})'.format(re.escape(query))
|
|
|
|
- content = re.sub(query_regex, '<b>\\1</b>', content, flags=re.I | re.U)
|
|
|
|
- else:
|
|
|
|
- regex_parts = []
|
|
|
|
- for chunk in query.split():
|
|
|
|
- if len(chunk) == 1:
|
|
|
|
- regex_parts.append(u'\W+{0}\W+'.format(re.escape(chunk)))
|
|
|
|
- else:
|
|
|
|
- regex_parts.append(u'{0}'.format(re.escape(chunk)))
|
|
|
|
- query_regex = u'({0})'.format('|'.join(regex_parts))
|
|
|
|
- content = re.sub(query_regex, '<b>\\1</b>', content, flags=re.I | re.U)
|
|
|
|
-
|
|
|
|
- return content
|
|
|
|
-
|
|
|
|
def score_results(results):
|
|
def score_results(results):
|
|
flat_res = filter(None, chain.from_iterable(izip_longest(*results.values())))
|
|
flat_res = filter(None, chain.from_iterable(izip_longest(*results.values())))
|
|
flat_len = len(flat_res)
|
|
flat_len = len(flat_res)
|
|
@@ -218,8 +192,6 @@ def search(query, request, selected_engines):
|
|
results = score_results(results)
|
|
results = score_results(results)
|
|
|
|
|
|
for result in results:
|
|
for result in results:
|
|
- if 'content' in result:
|
|
|
|
- result['content'] = highlight_content(result['content'], query)
|
|
|
|
for res_engine in result['engines']:
|
|
for res_engine in result['engines']:
|
|
engines[result['engine']].stats['score_count'] += result['score']
|
|
engines[result['engine']].stats['score_count'] += result['score']
|
|
|
|
|