From e350b818e647a2d111c252594cbbdfad617b8538 Mon Sep 17 00:00:00 2001 From: TheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com> Date: Tue, 30 Jun 2026 10:36:57 +0900 Subject: modified newsboat/fulltext.py --- ar/.config/newsboat/fulltext.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'ar') diff --git a/ar/.config/newsboat/fulltext.py b/ar/.config/newsboat/fulltext.py index 4f567e5..33896b1 100755 --- a/ar/.config/newsboat/fulltext.py +++ b/ar/.config/newsboat/fulltext.py @@ -90,8 +90,11 @@ def extract(url): with open(cp, "r", encoding="utf-8", errors="replace") as f: return f.read() try: + # Force UTF-8 decoding: some sites (e.g. CNBC) serve UTF-8 but declare + # no charset in the HTTP header or an early , so rdrview falls back + # to Latin-1/CP1252 and mangles multibyte chars (em-dash -> "â€""). out = subprocess.run( - ["rdrview", "-H", url], + ["rdrview", "-E", "UTF-8", "-H", url], capture_output=True, text=True, timeout=TIMEOUT, ) except (FileNotFoundError, subprocess.TimeoutExpired): -- cgit v1.2.3