diff options
| author | TheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com> | 2026-06-30 10:36:57 +0900 |
|---|---|---|
| committer | TheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com> | 2026-06-30 10:36:57 +0900 |
| commit | e350b818e647a2d111c252594cbbdfad617b8538 (patch) | |
| tree | eaa683081c3cafa269407151ca42bb87b0c66758 | |
| parent | 0a2f2d475bfeb921c7edf4df03f645f94828fdbb (diff) | |
modified newsboat/fulltext.py
| -rwxr-xr-x | ar/.config/newsboat/fulltext.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/ar/.config/newsboat/fulltext.py b/ar/.config/newsboat/fulltext.py index 4f567e5..33896b1 100755 --- a/ar/.config/newsboat/fulltext.py +++ b/ar/.config/newsboat/fulltext.py @@ -90,8 +90,11 @@ def extract(url): with open(cp, "r", encoding="utf-8", errors="replace") as f: return f.read() try: + # Force UTF-8 decoding: some sites (e.g. CNBC) serve UTF-8 but declare + # no charset in the HTTP header or an early <meta>, so rdrview falls back + # to Latin-1/CP1252 and mangles multibyte chars (em-dash -> "â€""). out = subprocess.run( - ["rdrview", "-H", url], + ["rdrview", "-E", "UTF-8", "-H", url], capture_output=True, text=True, timeout=TIMEOUT, ) except (FileNotFoundError, subprocess.TimeoutExpired): |
