solr.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """.. sidebar:: info
  4. - :origin:`solr.py <searx/engines/solr.py>`
  5. - `Solr <https://solr.apache.org>`_
  6. - `Solr Resources <https://solr.apache.org/resources.html>`_
  7. - `Install Solr <https://solr.apache.org/guide/installing-solr.html>`_
  8. Solr_ is a popular search engine based on Lucene, just like Elasticsearch_. But
  9. instead of searching in indices, you can search in collections.
  10. Example
  11. =======
  12. This is an example configuration for searching in the collection
  13. ``my-collection`` and get the results in ascending order.
  14. .. code:: yaml
  15. - name: solr
  16. engine: solr
  17. shortcut: slr
  18. base_url: http://localhost:8983
  19. collection: my-collection
  20. sort: asc
  21. enable_http: true
  22. """
  23. # pylint: disable=global-statement
  24. from json import loads
  25. from urllib.parse import urlencode
  26. from searx.exceptions import SearxEngineAPIException
  27. base_url = 'http://localhost:8983'
  28. collection = ''
  29. rows = 10
  30. sort = '' # sorting: asc or desc
  31. field_list = 'name' # list of field names to display on the UI
  32. default_fields = '' # default field to query
  33. query_fields = '' # query fields
  34. _search_url = ''
  35. paging = True
  36. def init(_):
  37. if collection == '':
  38. raise ValueError('collection cannot be empty')
  39. global _search_url
  40. _search_url = base_url + '/solr/' + collection + '/select?{params}'
  41. def request(query, params):
  42. query_params = {'q': query, 'rows': rows}
  43. if field_list != '':
  44. query_params['fl'] = field_list
  45. if query_fields != '':
  46. query_params['qf'] = query_fields
  47. if default_fields != '':
  48. query_params['df'] = default_fields
  49. if sort != '':
  50. query_params['sort'] = sort
  51. if 'pageno' in params:
  52. query_params['start'] = rows * (params['pageno'] - 1)
  53. params['url'] = _search_url.format(params=urlencode(query_params))
  54. return params
  55. def response(resp):
  56. resp_json = __get_response(resp)
  57. results = []
  58. for result in resp_json['response']['docs']:
  59. r = {key: str(value) for key, value in result.items()}
  60. if len(r) == 0:
  61. continue
  62. r['template'] = 'key-value.html'
  63. results.append(r)
  64. return results
  65. def __get_response(resp):
  66. try:
  67. resp_json = loads(resp.text)
  68. except Exception as e:
  69. raise SearxEngineAPIException("failed to parse response") from e
  70. if 'error' in resp_json:
  71. raise SearxEngineAPIException(resp_json['error']['msg'])
  72. return resp_json