Browse Source

[fix] fetch google's supported languages

Marc Abonce Seguin 6 years ago
parent
commit
0169b63e84
3 changed files with 17635 additions and 17230 deletions
  1. 17625 17204
      searx/data/engines_languages.json
  2. 3 3
      searx/engines/google.py
  3. 7 23
      tests/unit/engines/test_google.py

File diff suppressed because it is too large
+ 17625 - 17204
searx/data/engines_languages.json


+ 3 - 3
searx/engines/google.py

@@ -381,10 +381,10 @@ def attributes_to_html(attributes):
 def _fetch_supported_languages(resp):
     supported_languages = {}
     dom = html.fromstring(resp.text)
-    options = dom.xpath('//table//td/font/label/span')
+    options = dom.xpath('//*[@id="langSec"]//input[@name="lr"]')
     for option in options:
-        code = option.xpath('./@id')[0][1:]
-        name = option.text.title()
+        code = option.xpath('./@value')[0].split('_')[-1]
+        name = option.xpath('./@data-name')[0].title()
         supported_languages[code] = {"name": name}
 
     return supported_languages

+ 7 - 23
tests/unit/engines/test_google.py

@@ -205,29 +205,13 @@ class TestGoogleEngine(SearxTestCase):
         html = u"""
         <html>
             <body>
-                <table>
-                    <tbody>
-                        <tr>
-                            <td>
-                                <font>
-                                    <label>
-                                        <span id="ten">English</span>
-                                    </label>
-                                </font>
-                            </td>
-                            <td>
-                                <font>
-                                    <label>
-                                        <span id="tzh-CN">中文 (简体)</span>
-                                    </label>
-                                    <label>
-                                        <span id="tzh-TW">中文 (繁體)</span>
-                                    </label>
-                                </font>
-                            </td>
-                        </tr>
-                    </tbody>
-                </table>
+                <div id="langSec">
+                    <div>
+                        <input name="lr" data-name="english" value="lang_en" />
+                        <input name="lr" data-name="中文 (简体)" value="lang_zh-CN" />
+                        <input name="lr" data-name="中文 (繁體)" value="lang_zh-TW" />
+                    </div>
+                </div>
             </body>
         </html>
         """

Some files were not shown because too many files changed in this diff