|
@@ -9,6 +9,7 @@
|
|
|
|
|
|
from urllib.parse import urlencode
|
|
from urllib.parse import urlencode
|
|
from datetime import datetime
|
|
from datetime import datetime
|
|
|
|
+from html import unescape
|
|
import time
|
|
import time
|
|
import json
|
|
import json
|
|
|
|
|
|
@@ -119,11 +120,15 @@ def parse_general(data):
|
|
except (ValueError, TypeError):
|
|
except (ValueError, TypeError):
|
|
published_date = None
|
|
published_date = None
|
|
|
|
|
|
|
|
+ # title and content sometimes containing characters such as & ' " etc...
|
|
|
|
+ title = unescape(entry["title"])
|
|
|
|
+ content = unescape(entry.get("abs", ""))
|
|
|
|
+
|
|
results.append(
|
|
results.append(
|
|
{
|
|
{
|
|
- "title": entry["title"],
|
|
|
|
|
|
+ "title": title,
|
|
"url": entry["url"],
|
|
"url": entry["url"],
|
|
- "content": entry.get("abs", ""),
|
|
|
|
|
|
+ "content": content,
|
|
"publishedDate": published_date,
|
|
"publishedDate": published_date,
|
|
}
|
|
}
|
|
)
|
|
)
|