3 years ago · 39d0156f38
--- a/searx/engines/tineye.py
+++ b/searx/engines/tineye.py
@@ -17,6 +17,7 @@ billion images `[tineye.com] <https://tineye.com/how>`_.
 
				 
			
 
				 from urllib.parse import urlencode
			
 
				 from datetime import datetime
			
 
				+from flask_babel import gettext
			
 
				 
			
 
				 about = {
			
 
				     "website": 'https://tineye.com',
			
@@ -28,20 +29,41 @@ about = {
 
				 }
			
 
				 
			
 
				 engine_type = 'online_url_search'
			
 
				+""":py:obj:`searx.search.processors.online_url_search`"""
			
 
				+
			
 
				 categories = ['general']
			
 
				 paging = True
			
 
				 safesearch = False
			
 
				 base_url = 'https://tineye.com'
			
 
				 search_string = '/result_json/?page={page}&{query}'
			
 
				 
			
 
				+FORMAT_NOT_SUPPORTED = gettext(
			
 
				+    "Could not read that image url. This may be due to an unsupported file"
			
 
				+    " format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP."
			
 
				+)
			
 
				+"""TinEye error message"""
			
 
				+
			
 
				+NO_SIGNATURE_ERROR = gettext(
			
 
				+    "The image is too simple to find matches. TinEye requires a basic level of"
			
 
				+    " visual detail to successfully identify matches."
			
 
				+)
			
 
				+"""TinEye error message"""
			
 
				+
			
 
				+DOWNLOAD_ERROR = gettext("The image could not be downloaded.")
			
 
				+"""TinEye error message"""
			
 
				+
			
 
				 
			
 
				 def request(query, params):
			
 
				+    """Build TinEye HTTP request using ``search_urls`` of a :py:obj:`engine_type`."""
			
 
				+
			
 
				+    params['raise_for_httperror'] = False
			
 
				 
			
 
				     if params['search_urls']['data:image']:
			
 
				         query = params['search_urls']['data:image']
			
 
				     elif params['search_urls']['http']:
			
 
				         query = params['search_urls']['http']
			
 
				 
			
 
				+    logger.debug("query URL: %s", query)
			
 
				     query = urlencode({'url': query})
			
 
				 
			
 
				     # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
			
@@ -59,45 +81,145 @@ def request(query, params):
 
				     return params
			
 
				 
			
 
				 
			
 
				+def parse_tineye_match(match_json):
			
 
				+    """Takes parsed JSON from the API server and turns it into a :py:obj:`dict`
			
 
				+    object.
			
 
				+
			
 
				+    Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__
			
 
				+
			
 
				+    - `image_url`, link to the result image.
			
 
				+    - `domain`, domain this result was found on.
			
 
				+    - `score`, a number (0 to 100) that indicates how closely the images match.
			
 
				+    - `width`, image width in pixels.
			
 
				+    - `height`, image height in pixels.
			
 
				+    - `size`, image area in pixels.
			
 
				+    - `format`, image format.
			
 
				+    - `filesize`, image size in bytes.
			
 
				+    - `overlay`, overlay URL.
			
 
				+    - `tags`, whether this match belongs to a collection or stock domain.
			
 
				+
			
 
				+    - `backlinks`, a list of Backlink objects pointing to the original websites
			
 
				+      and image URLs. List items are instances of :py:obj:`dict`, (`Backlink
			
 
				+      <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__):
			
 
				+
			
 
				+      - `url`, the image URL to the image.
			
 
				+      - `backlink`, the original website URL.
			
 
				+      - `crawl_date`, the date the image was crawled.
			
 
				+
			
 
				+    """
			
 
				+
			
 
				+    # HINT: there exists an alternative backlink dict in the domains list / e.g.::
			
 
				+    #
			
 
				+    #     match_json['domains'][0]['backlinks']
			
 
				+
			
 
				+    backlinks = []
			
 
				+    if "backlinks" in match_json:
			
 
				+
			
 
				+        for backlink_json in match_json["backlinks"]:
			
 
				+            if not isinstance(backlink_json, dict):
			
 
				+                continue
			
 
				+
			
 
				+            crawl_date = backlink_json.get("crawl_date")
			
 
				+            if crawl_date:
			
 
				+                crawl_date = datetime.fromisoformat(crawl_date[:-3])
			
 
				+            else:
			
 
				+                crawl_date = datetime.min
			
 
				+
			
 
				+            backlinks.append(
			
 
				+                {
			
 
				+                    'url': backlink_json.get("url"),
			
 
				+                    'backlink': backlink_json.get("backlink"),
			
 
				+                    'crawl_date': crawl_date,
			
 
				+                    'image_name': backlink_json.get("image_name"),
			
 
				+                }
			
 
				+            )
			
 
				+
			
 
				+    return {
			
 
				+        'image_url': match_json.get("image_url"),
			
 
				+        'domain': match_json.get("domain"),
			
 
				+        'score': match_json.get("score"),
			
 
				+        'width': match_json.get("width"),
			
 
				+        'height': match_json.get("height"),
			
 
				+        'size': match_json.get("size"),
			
 
				+        'image_format': match_json.get("format"),
			
 
				+        'filesize': match_json.get("filesize"),
			
 
				+        'overlay': match_json.get("overlay"),
			
 
				+        'tags': match_json.get("tags"),
			
 
				+        'backlinks': backlinks,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				 def response(resp):
			
 
				+    """Parse HTTP response from TinEye."""
			
 
				     results = []
			
 
				 
			
 
				-    # Define wanted results
			
 
				-    json_data = resp.json()
			
 
				-    number_of_results = json_data['num_matches']
			
 
				-
			
 
				-    for i in json_data['matches']:
			
 
				-        image_format = i['format']
			
 
				-        width = i['width']
			
 
				-        height = i['height']
			
 
				-        thumbnail_src = i['image_url']
			
 
				-        backlink = i['domains'][0]['backlinks'][0]
			
 
				-        url = backlink['backlink']
			
 
				-        source = backlink['url']
			
 
				-        title = backlink['image_name']
			
 
				-        img_src = backlink['url']
			
 
				-
			
 
				-        # Get and convert published date
			
 
				-        api_date = backlink['crawl_date'][:-3]
			
 
				-        publishedDate = datetime.fromisoformat(api_date)
			
 
				-
			
 
				-        # Append results
			
 
				+    try:
			
 
				+        json_data = resp.json()
			
 
				+    except Exception as exc:  # pylint: disable=broad-except
			
 
				+        msg = "can't parse JSON response // %s" % exc
			
 
				+        logger.error(msg)
			
 
				+        json_data = {'error': msg}
			
 
				+
			
 
				+    # handle error codes from Tineye
			
 
				+
			
 
				+    if resp.is_error:
			
 
				+        if resp.status_code in (400, 422):
			
 
				+
			
 
				+            message = 'HTTP status: %s' % resp.status_code
			
 
				+            error = json_data.get('error')
			
 
				+            s_key = json_data.get('suggestions', {}).get('key', '')
			
 
				+
			
 
				+            if error and s_key:
			
 
				+                message = "%s (%s)" % (error, s_key)
			
 
				+            elif error:
			
 
				+                message = error
			
 
				+
			
 
				+            if s_key == "Invalid image URL":
			
 
				+                # test https://docs.searxng.org/_static/searxng-wordmark.svg
			
 
				+                message = FORMAT_NOT_SUPPORTED
			
 
				+            elif s_key == 'NO_SIGNATURE_ERROR':
			
 
				+                # test https://pngimg.com/uploads/dot/dot_PNG4.png
			
 
				+                message = NO_SIGNATURE_ERROR
			
 
				+            elif s_key == 'Download Error':
			
 
				+                # test https://notexists
			
 
				+                message = DOWNLOAD_ERROR
			
 
				+
			
 
				+            # see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
			
 
				+            # results.append({'answer': message})
			
 
				+            logger.error(message)
			
 
				+
			
 
				+            return results
			
 
				+
			
 
				+        resp.raise_for_status()
			
 
				+
			
 
				+    # append results from matches
			
 
				+
			
 
				+    for match_json in json_data['matches']:
			
 
				+
			
 
				+        tineye_match = parse_tineye_match(match_json)
			
 
				+        if not tineye_match['backlinks']:
			
 
				+            continue
			
 
				+
			
 
				+        backlink = tineye_match['backlinks'][0]
			
 
				         results.append(
			
 
				             {
			
 
				                 'template': 'images.html',
			
 
				-                'url': url,
			
 
				-                'thumbnail_src': thumbnail_src,
			
 
				-                'source': source,
			
 
				-                'title': title,
			
 
				-                'img_src': img_src,
			
 
				-                'format': image_format,
			
 
				-                'widht': width,
			
 
				-                'height': height,
			
 
				-                'publishedDate': publishedDate,
			
 
				+                'url': backlink['backlink'],
			
 
				+                'thumbnail_src': tineye_match['image_url'],
			
 
				+                'source': backlink['url'],
			
 
				+                'title': backlink['image_name'],
			
 
				+                'img_src': backlink['url'],
			
 
				+                'format': tineye_match['image_format'],
			
 
				+                'widht': tineye_match['width'],
			
 
				+                'height': tineye_match['height'],
			
 
				+                'publishedDate': backlink['crawl_date'],
			
 
				             }
			
 
				         )
			
 
				 
			
 
				-    # Append number of results
			
 
				-    results.append({'number_of_results': number_of_results})
			
 
				+    # append number of results
			
 
				+
			
 
				+    number_of_results = json_data.get('num_matches')
			
 
				+    if number_of_results:
			
 
				+        results.append({'number_of_results': number_of_results})
			
 
				 
			
 
				     return results