scanr_structures.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. """
  2. ScanR Structures (Science)
  3. @website https://scanr.enseignementsup-recherche.gouv.fr
  4. @provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html)
  5. @using-api yes
  6. @results JSON
  7. @stable yes
  8. @parse url, title, content, img_src
  9. """
  10. from urllib import urlencode
  11. from json import loads, dumps
  12. from dateutil import parser
  13. from searx.utils import html_to_text
  14. # engine dependent config
  15. categories = ['science']
  16. paging = True
  17. page_size = 20
  18. # search-url
  19. url = 'https://scanr.enseignementsup-recherche.gouv.fr/'
  20. search_url = url + 'api/structures/search'
  21. # do search-request
  22. def request(query, params):
  23. params['url'] = search_url
  24. params['method'] = 'POST'
  25. params['headers']['Content-type'] = "application/json"
  26. params['data'] = dumps({"query": query,
  27. "searchField": "ALL",
  28. "sortDirection": "ASC",
  29. "sortOrder": "RELEVANCY",
  30. "page": params['pageno'],
  31. "pageSize": page_size})
  32. return params
  33. # get response from search-request
  34. def response(resp):
  35. results = []
  36. search_res = loads(resp.text)
  37. # return empty array if there are no results
  38. if search_res.get('total') < 1:
  39. return []
  40. # parse results
  41. for result in search_res['results']:
  42. if 'id' not in result:
  43. continue
  44. # is it thumbnail or img_src??
  45. thumbnail = None
  46. if 'logo' in result:
  47. thumbnail = result['logo']
  48. if thumbnail[0] == '/':
  49. thumbnail = url + thumbnail
  50. content = None
  51. if 'highlights' in result:
  52. content = result['highlights'][0]['value']
  53. # append result
  54. results.append({'url': url + 'structure/' + result['id'],
  55. 'title': result['label'],
  56. # 'thumbnail': thumbnail,
  57. 'img_src': thumbnail,
  58. 'content': html_to_text(content)})
  59. # return results
  60. return results