summaryrefslogtreecommitdiff
path: root/content/rss-full-text.md
diff options
context:
space:
mode:
Diffstat (limited to 'content/rss-full-text.md')
-rw-r--r--content/rss-full-text.md204
1 files changed, 204 insertions, 0 deletions
diff --git a/content/rss-full-text.md b/content/rss-full-text.md
new file mode 100644
index 0000000..995797d
--- /dev/null
+++ b/content/rss-full-text.md
@@ -0,0 +1,204 @@
+---
+title: "RSS Full Text"
+date: 2025-07-01
+tags: ["service"]
+icon: "rss.svg"
+short_desc: "Creates RSS full text feeds."
+---
+
+[fivefilters full-text-rss](https://www.fivefilters.org/full-text-rss/)
+is Full-Text RSS service. Free open source RSS feeds usually provide
+the summary or limited lines of URL contents. There is a way to see
+the entire web page with text-based web browser such as lynx, w3m, and so on.
+However, I want to do everything in [newsboat](https://github.com/newsboat/newsboat).
+With this, you can retrieve the full-text of individual articles or
+complete full-text RSS feeds. It enriches third-party RSS feeds with
+full text articles.
+
+## Installation
+
+### Setting Up and Configuring
+
+Check PHP version (e.g., php7.4-fpm). Adjust in later steps accordingly.
+
+```sh
+sudo apt update
+sudo apt install nginx php php-fpm php-tidy git unzip certbot python3-certbot-nginx
+```
+
+Download Full-Text RSS
+
+```sh
+cd /var/www
+sudo git clone https://bitbucket.org/fivefilters/full-text-rss.git
+cd full-text-rss
+sudo git reset --hard 384d52fd83361ffd6e7f28bd39b322970a015a28
+```
+
+Download Full-Text-RSS site config
+
+```sh
+sudo git clone https://github.com/fivefilters/ftr-site-config site_config
+```
+
+Set permissions:
+
+```sh
+sudo mkdir -p cache/rss
+sudo chown -R www-data:www-data cache site_config
+sudo chmod -R 777 cache site_config
+```
+
+### Configuring nginx & cerbot
+
+Create Nginx Config with Clean /feed + Rate Limiting
+
+Create a new site config:
+
+```sh
+sudo nano /etc/nginx/sites-available/fulltextrss
+```
+
+Paste this (adjust php7.4-fpm.sock if needed):
+
+```nginx
+# Rate limiting zone: 10 req/min per IP
+limit_req_zone $binary_remote_addr zone=ratelimit:10m rate=10r/m;
+
+server {
+ listen 80;
+ server_name rss.thesiah.xyz; # Change to your domain
+
+ root /var/www/full-text-rss;
+ index index.php;
+
+ # Rewrite clean URL /feed?url=... to makefulltextfeed.php
+ location /feed {
+ rewrite ^/feed$ /makefulltextfeed.php last;
+ limit_req zone=ratelimit burst=5;
+ }
+
+ location ~ \.php$ {
+ include snippets/fastcgi-php.conf;
+ fastcgi_pass unix:/var/run/php/php7.4-fpm.sock;
+ }
+
+ location ~ /\.ht {
+ deny all;
+ }
+}
+```
+
+Enable nginx on the site:
+
+```sh
+sudo ln -s /etc/nginx/sites-available/fulltextrss /etc/nginx/sites-enabled/
+sudo nginx -t && sudo systemctl reload nginx
+```
+
+Secure with HTTPS (Certbot)
+
+```sh
+sudo certbot --nginx -d rss.thesiah.xyz
+```
+
+### Custom Config
+
+Add custom_config.php:
+Save to: /var/www/full-text-rss/custom_config.php
+
+```php
+<?php
+/* Full-Text RSS config */
+if (!isset($options)) $options = new stdClass();
+$options->enabled = true;
+$options->debug = true;
+$options->default_entries = 200;
+$options->max_entries = 1000;
+$options->content = 'user';
+$options->summary = 'user';
+$options->rewrite_relative_urls = true;
+$options->exclude_items_on_fail = 'user';
+$options->singlepage = true;
+$options->multipage = true;
+$options->caching = true;
+$options->cache_time = 120;
+$options->cache_dir = dirname(__FILE__).'/cache';
+$options->message_to_prepend = '';
+$options->message_to_append = '';
+$options->error_message = '[unable to retrieve full-text content]';
+$options->keep_enclosures = true;
+$options->detect_language = 'user';
+$options->user_submitted_config = false;
+$options->remove_native_ads = false;
+$options->admin_credentials = array('username'=>'admin', 'password'=>getenv('FTR_ADMIN_PASSWORD'));
+$options->allowed_urls = array();
+$options->blocked_urls = array();
+$options->key_required = false;
+$options->api_keys = array();
+$options->default_entries_with_key = 5;
+$options->max_entries_with_key = 10;
+$options->xss_filter = 'user';
+$options->favour_feed_titles = 'user';
+$options->allowed_parsers = array('libxml', 'html5php');
+$options->allow_parser_override = true;
+$options->cors = false;
+$options->proxy_servers = array();
+$options->proxy = true;
+$options->allow_proxy_override = true;
+$options->apc = true;
+$options->smart_cache = true;
+$options->fingerprints = array(
+ '<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true),
+ '<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
+ '<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
+ '<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true)
+);
+$options->rewrite_url = array(
+ 'docs.google.com' => array('/Doc?' => '/View?'),
+ 'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'),
+ '.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org'),
+ 'm.vanityfair.com' => array('m.vanityfair.com' => 'www.vanityfair.com')
+);
+$options->content_type_exc = array(
+ 'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
+ 'image' => array('action'=>'link', 'name'=>'Image'),
+ 'audio' => array('action'=>'link', 'name'=>'Audio'),
+ 'video' => array('action'=>'link', 'name'=>'Video')
+ );
+$options->cache_directory_level = 0;
+$options->cache_cleanup = 100;
+if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.5');
+if (basename(__FILE__) == 'config.php') {
+ if (file_exists(dirname(__FILE__).'/custom_config.php')) {
+ require_once dirname(__FILE__).'/custom_config.php';
+ }
+
+ foreach ($options as $_key=>&$_val) {
+ $_key = "ftr_$_key";
+ if (($_env = getenv($_key)) !== false) {
+ if (is_array($_val)) {
+ if ($_key === 'ftr_admin_credentials') {
+ $_val = array_combine(array('username', 'password'), array_map('trim', explode(':', $_env, 2)));
+ if ($_val === false) $_val = array('username'=>'admin', 'password'=>'');
+ }
+ } elseif ($_env === 'true' || $_env === 'false') {
+ $_val = ($_env === 'true');
+ } elseif (is_numeric($_env)) {
+ $_val = (int)$_env;
+ $_val = $_env;
+ }
+ }
+ }
+ unset($_key, $_val, $_env);
+}
+```
+
+### Local host
+
+Visit http://localhost:80 for the integrated web UI
+
+{{<img src="/pix/rss-webui.png" alt="A screenshot of the Full Text RSS" >}}
+
+Article extraction: http://localhost/extract.php?url=[url]
+Feed conversion: http://localhost/makefulltextfeed.php?url=[url]