yacy.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """YaCy_ is a free distributed search engine, built on the principles of
  3. peer-to-peer (P2P) networks.
  4. API: Dev:APIyacysearch_
  5. Releases:
  6. - https://github.com/yacy/yacy_search_server/tags
  7. - https://download.yacy.net/
  8. .. _Yacy: https://yacy.net/
  9. .. _Dev:APIyacysearch: https://wiki.yacy.net/index.php/Dev:APIyacysearch
  10. Configuration
  11. =============
  12. The engine has the following (additional) settings:
  13. - :py:obj:`http_digest_auth_user`
  14. - :py:obj:`http_digest_auth_pass`
  15. - :py:obj:`search_mode`
  16. - :py:obj:`search_type`
  17. .. code:: yaml
  18. - name: yacy
  19. engine: yacy
  20. categories: general
  21. search_type: text
  22. base_url: https://yacy.searchlab.eu
  23. shortcut: ya
  24. - name: yacy images
  25. engine: yacy
  26. categories: images
  27. search_type: image
  28. base_url: https://yacy.searchlab.eu
  29. shortcut: yai
  30. disabled: true
  31. Implementations
  32. ===============
  33. """
  34. # pylint: disable=fixme
  35. from json import loads
  36. from urllib.parse import urlencode
  37. from dateutil import parser
  38. from httpx import DigestAuth
  39. from searx.utils import html_to_text
  40. # about
  41. about = {
  42. "website": 'https://yacy.net/',
  43. "wikidata_id": 'Q1759675',
  44. "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
  45. "use_official_api": True,
  46. "require_api_key": False,
  47. "results": 'JSON',
  48. }
  49. # engine dependent config
  50. categories = ['general']
  51. paging = True
  52. number_of_results = 10
  53. http_digest_auth_user = ""
  54. """HTTP digest user for the local YACY instance"""
  55. http_digest_auth_pass = ""
  56. """HTTP digest password for the local YACY instance"""
  57. search_mode = 'global'
  58. """Yacy search mode ``global`` or ``local``. By default, Yacy operates in ``global``
  59. mode.
  60. ``global``
  61. Peer-to-Peer search
  62. ``local``
  63. Privacy or Stealth mode, restricts the search to local yacy instance.
  64. """
  65. search_type = 'text'
  66. """One of ``text``, ``image`` / The search-types ``app``, ``audio`` and
  67. ``video`` are not yet implemented (Pull-Requests are welcome).
  68. """
  69. # search-url
  70. base_url = 'https://yacy.searchlab.eu'
  71. search_url = (
  72. '/yacysearch.json?{query}'
  73. '&startRecord={offset}'
  74. '&maximumRecords={limit}'
  75. '&contentdom={search_type}'
  76. '&resource={resource}'
  77. )
  78. def init(_):
  79. valid_types = [
  80. 'text',
  81. 'image',
  82. # 'app', 'audio', 'video',
  83. ]
  84. if search_type not in valid_types:
  85. raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types))
  86. def request(query, params):
  87. offset = (params['pageno'] - 1) * number_of_results
  88. params['url'] = base_url + search_url.format(
  89. query=urlencode({'query': query}),
  90. offset=offset,
  91. limit=number_of_results,
  92. search_type=search_type,
  93. resource=search_mode,
  94. )
  95. if http_digest_auth_user and http_digest_auth_pass:
  96. params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
  97. # add language tag if specified
  98. if params['language'] != 'all':
  99. params['url'] += '&lr=lang_' + params['language'].split('-')[0]
  100. return params
  101. def response(resp):
  102. results = []
  103. raw_search_results = loads(resp.text)
  104. # return empty array if there are no results
  105. if not raw_search_results:
  106. return []
  107. search_results = raw_search_results.get('channels', [])
  108. if len(search_results) == 0:
  109. return []
  110. for result in search_results[0].get('items', []):
  111. # parse image results
  112. if search_type == 'image':
  113. result_url = ''
  114. if 'url' in result:
  115. result_url = result['url']
  116. elif 'link' in result:
  117. result_url = result['link']
  118. else:
  119. continue
  120. # append result
  121. results.append(
  122. {
  123. 'url': result_url,
  124. 'title': result['title'],
  125. 'content': '',
  126. 'img_src': result['image'],
  127. 'template': 'images.html',
  128. }
  129. )
  130. # parse general results
  131. else:
  132. publishedDate = None
  133. if 'pubDate' in result:
  134. publishedDate = parser.parse(result['pubDate'])
  135. # append result
  136. results.append(
  137. {
  138. 'url': result['link'] or '',
  139. 'title': result['title'],
  140. 'content': html_to_text(result['description']),
  141. 'publishedDate': publishedDate,
  142. }
  143. )
  144. # TODO parse video, audio and file results
  145. return results