Browse Source

[enh] engine - add Tineye reverse image search

Other optional parameter ..

`&sort=crawl_date`
    can be appended to search_string to sort results by date.

`&domain=example.org`
    can be implemented to search_string to get results from just one domain.

Public instances could get relatively fast timed-out for 3600s.

--

Merged from @allendema's commit [1] and slightly modfied / see [2].

Related-to: [1] https://github.com/allendema/searx/commit/455b2b4460cd830ac1f7e62e824040e2fe648de9
Related-to: [2] https://github.com/searx/searx/pull/3040
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Allen 3 years ago
parent
commit
880555e263
2 changed files with 89 additions and 0 deletions
  1. 83 0
      searx/engines/tineye.py
  2. 6 0
      searx/settings.yml

+ 83 - 0
searx/engines/tineye.py

@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Tineye - Reverse search images
+
+"""
+
+from json import loads
+from urllib.parse import urlencode
+from datetime import datetime
+
+about = {
+    "website": 'https://tineye.com',
+    "wikidata_id": 'Q2382535',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
+categories = ['images']
+paging = True
+safesearch = False
+base_url = 'https://tineye.com'
+search_string = '/result_json/?page={page}&{query}'
+
+
+def request(query, params):
+    # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
+    params['url'] = base_url + search_string.format(query=urlencode({'url': query}), page=params['pageno'])
+
+    params['headers'].update(
+        {
+            'Connection': 'keep-alive',
+            'Accept-Encoding': 'gzip, defalte, br',
+            'Host': 'tineye.com',
+            'DNT': '1',
+            'TE': 'trailers',
+        }
+    )
+    return params
+
+
+def response(resp):
+    results = []
+
+    # Define wanted results
+    json_data = loads(resp.text)
+    number_of_results = json_data['num_matches']
+
+    for i in json_data['matches']:
+        image_format = i['format']
+        width = i['width']
+        height = i['height']
+        thumbnail_src = i['image_url']
+        backlink = i['domains'][0]['backlinks'][0]
+        url = backlink['backlink']
+        source = backlink['url']
+        title = backlink['image_name']
+        img_src = backlink['url']
+
+        # Get and convert published date
+        api_date = backlink['crawl_date'][:-3]
+        publishedDate = datetime.fromisoformat(api_date)
+
+        # Append results
+        results.append(
+            {
+                'template': 'images.html',
+                'url': url,
+                'thumbnail_src': thumbnail_src,
+                'source': source,
+                'title': title,
+                'img_src': img_src,
+                'format': image_format,
+                'widht': width,
+                'height': height,
+                'publishedDate': publishedDate,
+            }
+        )
+
+    # Append number of results
+    results.append({'number_of_results': number_of_results})
+
+    return results

+ 6 - 0
searx/settings.yml

@@ -483,6 +483,12 @@ engines:
     timeout: 3.0
     timeout: 3.0
     disabled: true
     disabled: true
 
 
+  - name: tineye
+    engine: tineye
+    shortcut: tin
+    timeout: 9.0
+    disabled: true
+
   - name: etymonline
   - name: etymonline
     engine: xpath
     engine: xpath
     paging: true
     paging: true