From a8853cdc59ab5a266a3d993c69e737263b2ad630 Mon Sep 17 00:00:00 2001 From: John Xina <bingchilling@riseup.net> Date: Fri, 21 Jul 2023 18:04:54 +0800 Subject: [PATCH] add robots.txt support, default to strict policy --- app.py | 11 ++++++++++- config.toml.sample | 8 ++++++++ static/rules/robots_relaxed.txt | 6 ++++++ static/rules/robots_strict.txt | 2 ++ 4 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 static/rules/robots_relaxed.txt create mode 100644 static/rules/robots_strict.txt diff --git a/app.py b/app.py index c372991..b6a571a 100644 --- a/app.py +++ b/app.py @@ -17,7 +17,7 @@ import requests from urllib.parse import urlparse import asyncio -from aioflask import request, make_response, redirect, url_for +from aioflask import request, make_response, redirect, url_for, send_from_directory import subprocess from bs4 import BeautifulSoup @@ -90,6 +90,15 @@ async def test_view(): resp.set_cookie('theme', 'default') return resp +@app.route('/robots.txt') +def robots_txt(): + policy = appconf['site'].get('robots_policy') or 'strict' + + if policy == 'PLEASE_INDEX_EVERYTHING': + return '', 404 + + return send_from_directory(f'static/rules', f'robots_{policy}.txt') + ########################################## # Error handling ########################################## diff --git a/config.toml.sample b/config.toml.sample index 54334d3..196ef5c 100644 --- a/config.toml.sample +++ b/config.toml.sample @@ -12,6 +12,14 @@ site_allow_download = true # This will allow (potentially unsafe) error response to be showed directly on site. site_show_unsafe_error_response = false +# This controls whether the search engines will index your site or not. +# Available options: +# - 'strict', indexing is disallowed. +# - 'relexed', search engines can index articles and search page. +# - 'PLEASE_INDEX_EVERYTHING', a VERY DANGEROUS option, may give you a lawsuit. +# Default to 'strict'. +robots_policy = 'strict' + [flask] host = '0.0.0.0' diff --git a/static/rules/robots_relaxed.txt b/static/rules/robots_relaxed.txt new file mode 100644 index 0000000..9f80730 --- /dev/null +++ b/static/rules/robots_relaxed.txt @@ -0,0 +1,6 @@ +User-agent: * +Disallow: /vv/ +Disallow: /space/ +Disallow: /author/ +Disallow: /video_listen/ +Disallow: /video/ diff --git a/static/rules/robots_strict.txt b/static/rules/robots_strict.txt new file mode 100644 index 0000000..1f53798 --- /dev/null +++ b/static/rules/robots_strict.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: / -- GitLab