yacy.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """YaCy_ is a free distributed search engine, built on the principles of
  4. peer-to-peer (P2P) networks.
  5. API: Dev:APIyacysearch_
  6. Releases:
  7. - https://github.com/yacy/yacy_search_server/tags
  8. - https://download.yacy.net/
  9. .. _Yacy: https://yacy.net/
  10. .. _Dev:APIyacysearch: https://wiki.yacy.net/index.php/Dev:APIyacysearch
  11. Configuration
  12. =============
  13. The engine has the following (additional) settings:
  14. - :py:obj:`http_digest_auth_user`
  15. - :py:obj:`http_digest_auth_pass`
  16. - :py:obj:`search_mode`
  17. - :py:obj:`search_type`
  18. .. code:: yaml
  19. - name: yacy
  20. engine: yacy
  21. categories: general
  22. search_type: text
  23. base_url: https://yacy.searchlab.eu
  24. shortcut: ya
  25. - name: yacy images
  26. engine: yacy
  27. categories: images
  28. search_type: image
  29. base_url: https://yacy.searchlab.eu
  30. shortcut: yai
  31. disabled: true
  32. Implementations
  33. ===============
  34. """
  35. # pylint: disable=fixme
  36. from json import loads
  37. from urllib.parse import urlencode
  38. from dateutil import parser
  39. from httpx import DigestAuth
  40. from searx.utils import html_to_text
  41. # about
  42. about = {
  43. "website": 'https://yacy.net/',
  44. "wikidata_id": 'Q1759675',
  45. "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
  46. "use_official_api": True,
  47. "require_api_key": False,
  48. "results": 'JSON',
  49. }
  50. # engine dependent config
  51. categories = ['general']
  52. paging = True
  53. number_of_results = 10
  54. http_digest_auth_user = ""
  55. """HTTP digest user for the local YACY instance"""
  56. http_digest_auth_pass = ""
  57. """HTTP digest password for the local YACY instance"""
  58. search_mode = 'global'
  59. """Yacy search mode ``global`` or ``local``. By default, Yacy operates in ``global``
  60. mode.
  61. ``global``
  62. Peer-to-Peer search
  63. ``local``
  64. Privacy or Stealth mode, restricts the search to local yacy instance.
  65. """
  66. search_type = 'text'
  67. """One of ``text``, ``image`` / The search-types ``app``, ``audio`` and
  68. ``video`` are not yet implemented (Pull-Requests are welcome).
  69. """
  70. # search-url
  71. base_url = 'https://yacy.searchlab.eu'
  72. search_url = (
  73. '/yacysearch.json?{query}'
  74. '&startRecord={offset}'
  75. '&maximumRecords={limit}'
  76. '&contentdom={search_type}'
  77. '&resource={resource}'
  78. )
  79. def init(_):
  80. valid_types = [
  81. 'text',
  82. 'image',
  83. # 'app', 'audio', 'video',
  84. ]
  85. if search_type not in valid_types:
  86. raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types))
  87. def request(query, params):
  88. offset = (params['pageno'] - 1) * number_of_results
  89. params['url'] = base_url + search_url.format(
  90. query=urlencode({'query': query}),
  91. offset=offset,
  92. limit=number_of_results,
  93. search_type=search_type,
  94. resource=search_mode,
  95. )
  96. if http_digest_auth_user and http_digest_auth_pass:
  97. params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
  98. # add language tag if specified
  99. if params['language'] != 'all':
  100. params['url'] += '&lr=lang_' + params['language'].split('-')[0]
  101. return params
  102. def response(resp):
  103. results = []
  104. raw_search_results = loads(resp.text)
  105. # return empty array if there are no results
  106. if not raw_search_results:
  107. return []
  108. search_results = raw_search_results.get('channels', [])
  109. if len(search_results) == 0:
  110. return []
  111. for result in search_results[0].get('items', []):
  112. # parse image results
  113. if search_type == 'image':
  114. result_url = ''
  115. if 'url' in result:
  116. result_url = result['url']
  117. elif 'link' in result:
  118. result_url = result['link']
  119. else:
  120. continue
  121. # append result
  122. results.append(
  123. {
  124. 'url': result_url,
  125. 'title': result['title'],
  126. 'content': '',
  127. 'img_src': result['image'],
  128. 'template': 'images.html',
  129. }
  130. )
  131. # parse general results
  132. else:
  133. publishedDate = None
  134. if 'pubDate' in result:
  135. publishedDate = parser.parse(result['pubDate'])
  136. # append result
  137. results.append(
  138. {
  139. 'url': result['link'] or '',
  140. 'title': result['title'],
  141. 'content': html_to_text(result['description']),
  142. 'publishedDate': publishedDate,
  143. }
  144. )
  145. # TODO parse video, audio and file results
  146. return results