summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com>2026-06-30 10:36:57 +0900
committerTheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com>2026-06-30 10:36:57 +0900
commite350b818e647a2d111c252594cbbdfad617b8538 (patch)
treeeaa683081c3cafa269407151ca42bb87b0c66758
parent0a2f2d475bfeb921c7edf4df03f645f94828fdbb (diff)
modified newsboat/fulltext.py
-rwxr-xr-xar/.config/newsboat/fulltext.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/ar/.config/newsboat/fulltext.py b/ar/.config/newsboat/fulltext.py
index 4f567e5..33896b1 100755
--- a/ar/.config/newsboat/fulltext.py
+++ b/ar/.config/newsboat/fulltext.py
@@ -90,8 +90,11 @@ def extract(url):
with open(cp, "r", encoding="utf-8", errors="replace") as f:
return f.read()
try:
+ # Force UTF-8 decoding: some sites (e.g. CNBC) serve UTF-8 but declare
+ # no charset in the HTTP header or an early <meta>, so rdrview falls back
+ # to Latin-1/CP1252 and mangles multibyte chars (em-dash -> "â€"").
out = subprocess.run(
- ["rdrview", "-H", url],
+ ["rdrview", "-E", "UTF-8", "-H", url],
capture_output=True, text=True, timeout=TIMEOUT,
)
except (FileNotFoundError, subprocess.TimeoutExpired):