diff options
Diffstat (limited to 'content/rss-full-text.md')
| -rw-r--r-- | content/rss-full-text.md | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/content/rss-full-text.md b/content/rss-full-text.md new file mode 100644 index 0000000..995797d --- /dev/null +++ b/content/rss-full-text.md @@ -0,0 +1,204 @@ +--- +title: "RSS Full Text" +date: 2025-07-01 +tags: ["service"] +icon: "rss.svg" +short_desc: "Creates RSS full text feeds." +--- + +[fivefilters full-text-rss](https://www.fivefilters.org/full-text-rss/) +is Full-Text RSS service. Free open source RSS feeds usually provide +the summary or limited lines of URL contents. There is a way to see +the entire web page with text-based web browser such as lynx, w3m, and so on. +However, I want to do everything in [newsboat](https://github.com/newsboat/newsboat). +With this, you can retrieve the full-text of individual articles or +complete full-text RSS feeds. It enriches third-party RSS feeds with +full text articles. + +## Installation + +### Setting Up and Configuring + +Check PHP version (e.g., php7.4-fpm). Adjust in later steps accordingly. + +```sh +sudo apt update +sudo apt install nginx php php-fpm php-tidy git unzip certbot python3-certbot-nginx +``` + +Download Full-Text RSS + +```sh +cd /var/www +sudo git clone https://bitbucket.org/fivefilters/full-text-rss.git +cd full-text-rss +sudo git reset --hard 384d52fd83361ffd6e7f28bd39b322970a015a28 +``` + +Download Full-Text-RSS site config + +```sh +sudo git clone https://github.com/fivefilters/ftr-site-config site_config +``` + +Set permissions: + +```sh +sudo mkdir -p cache/rss +sudo chown -R www-data:www-data cache site_config +sudo chmod -R 777 cache site_config +``` + +### Configuring nginx & cerbot + +Create Nginx Config with Clean /feed + Rate Limiting + +Create a new site config: + +```sh +sudo nano /etc/nginx/sites-available/fulltextrss +``` + +Paste this (adjust php7.4-fpm.sock if needed): + +```nginx +# Rate limiting zone: 10 req/min per IP +limit_req_zone $binary_remote_addr zone=ratelimit:10m rate=10r/m; + +server { + listen 80; + server_name rss.thesiah.xyz; # Change to your domain + + root /var/www/full-text-rss; + index index.php; + + # Rewrite clean URL /feed?url=... to makefulltextfeed.php + location /feed { + rewrite ^/feed$ /makefulltextfeed.php last; + limit_req zone=ratelimit burst=5; + } + + location ~ \.php$ { + include snippets/fastcgi-php.conf; + fastcgi_pass unix:/var/run/php/php7.4-fpm.sock; + } + + location ~ /\.ht { + deny all; + } +} +``` + +Enable nginx on the site: + +```sh +sudo ln -s /etc/nginx/sites-available/fulltextrss /etc/nginx/sites-enabled/ +sudo nginx -t && sudo systemctl reload nginx +``` + +Secure with HTTPS (Certbot) + +```sh +sudo certbot --nginx -d rss.thesiah.xyz +``` + +### Custom Config + +Add custom_config.php: +Save to: /var/www/full-text-rss/custom_config.php + +```php +<?php +/* Full-Text RSS config */ +if (!isset($options)) $options = new stdClass(); +$options->enabled = true; +$options->debug = true; +$options->default_entries = 200; +$options->max_entries = 1000; +$options->content = 'user'; +$options->summary = 'user'; +$options->rewrite_relative_urls = true; +$options->exclude_items_on_fail = 'user'; +$options->singlepage = true; +$options->multipage = true; +$options->caching = true; +$options->cache_time = 120; +$options->cache_dir = dirname(__FILE__).'/cache'; +$options->message_to_prepend = ''; +$options->message_to_append = ''; +$options->error_message = '[unable to retrieve full-text content]'; +$options->keep_enclosures = true; +$options->detect_language = 'user'; +$options->user_submitted_config = false; +$options->remove_native_ads = false; +$options->admin_credentials = array('username'=>'admin', 'password'=>getenv('FTR_ADMIN_PASSWORD')); +$options->allowed_urls = array(); +$options->blocked_urls = array(); +$options->key_required = false; +$options->api_keys = array(); +$options->default_entries_with_key = 5; +$options->max_entries_with_key = 10; +$options->xss_filter = 'user'; +$options->favour_feed_titles = 'user'; +$options->allowed_parsers = array('libxml', 'html5php'); +$options->allow_parser_override = true; +$options->cors = false; +$options->proxy_servers = array(); +$options->proxy = true; +$options->allow_proxy_override = true; +$options->apc = true; +$options->smart_cache = true; +$options->fingerprints = array( + '<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true), + '<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true), + '<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true), + '<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true) +); +$options->rewrite_url = array( + 'docs.google.com' => array('/Doc?' => '/View?'), + 'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'), + '.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org'), + 'm.vanityfair.com' => array('m.vanityfair.com' => 'www.vanityfair.com') +); +$options->content_type_exc = array( + 'application/pdf' => array('action'=>'link', 'name'=>'PDF'), + 'image' => array('action'=>'link', 'name'=>'Image'), + 'audio' => array('action'=>'link', 'name'=>'Audio'), + 'video' => array('action'=>'link', 'name'=>'Video') + ); +$options->cache_directory_level = 0; +$options->cache_cleanup = 100; +if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.5'); +if (basename(__FILE__) == 'config.php') { + if (file_exists(dirname(__FILE__).'/custom_config.php')) { + require_once dirname(__FILE__).'/custom_config.php'; + } + + foreach ($options as $_key=>&$_val) { + $_key = "ftr_$_key"; + if (($_env = getenv($_key)) !== false) { + if (is_array($_val)) { + if ($_key === 'ftr_admin_credentials') { + $_val = array_combine(array('username', 'password'), array_map('trim', explode(':', $_env, 2))); + if ($_val === false) $_val = array('username'=>'admin', 'password'=>''); + } + } elseif ($_env === 'true' || $_env === 'false') { + $_val = ($_env === 'true'); + } elseif (is_numeric($_env)) { + $_val = (int)$_env; + $_val = $_env; + } + } + } + unset($_key, $_val, $_env); +} +``` + +### Local host + +Visit http://localhost:80 for the integrated web UI + +{{<img src="/pix/rss-webui.png" alt="A screenshot of the Full Text RSS" >}} + +Article extraction: http://localhost/extract.php?url=[url] +Feed conversion: http://localhost/makefulltextfeed.php?url=[url] |
