diff --git a/app.py b/app.py index c37299141199916e5b2f0ac3d5800c97fd1050a9..b6a571a9125e93af25d4f1ba3bd05872d4340056 100644 --- a/app.py +++ b/app.py @@ -17,7 +17,7 @@ import requests from urllib.parse import urlparse import asyncio -from aioflask import request, make_response, redirect, url_for +from aioflask import request, make_response, redirect, url_for, send_from_directory import subprocess from bs4 import BeautifulSoup @@ -90,6 +90,15 @@ async def test_view(): resp.set_cookie('theme', 'default') return resp +@app.route('/robots.txt') +def robots_txt(): + policy = appconf['site'].get('robots_policy') or 'strict' + + if policy == 'PLEASE_INDEX_EVERYTHING': + return '', 404 + + return send_from_directory(f'static/rules', f'robots_{policy}.txt') + ########################################## # Error handling ########################################## diff --git a/config.toml.sample b/config.toml.sample index 54334d3c190a635ce68d38f0e28a7da8f5a88717..196ef5c3f4c2f9e46808cce2769e9fe187609cbd 100644 --- a/config.toml.sample +++ b/config.toml.sample @@ -12,6 +12,14 @@ site_allow_download = true # This will allow (potentially unsafe) error response to be showed directly on site. site_show_unsafe_error_response = false +# This controls whether the search engines will index your site or not. +# Available options: +# - 'strict', indexing is disallowed. +# - 'relexed', search engines can index articles and search page. +# - 'PLEASE_INDEX_EVERYTHING', a VERY DANGEROUS option, may give you a lawsuit. +# Default to 'strict'. +robots_policy = 'strict' + [flask] host = '0.0.0.0' diff --git a/static/rules/robots_relaxed.txt b/static/rules/robots_relaxed.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f80730dd2063f4c5aa09c2d88ad019a601ead9c --- /dev/null +++ b/static/rules/robots_relaxed.txt @@ -0,0 +1,6 @@ +User-agent: * +Disallow: /vv/ +Disallow: /space/ +Disallow: /author/ +Disallow: /video_listen/ +Disallow: /video/ diff --git a/static/rules/robots_strict.txt b/static/rules/robots_strict.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f53798bb4fe33c86020be7f10c44f29486fd190 --- /dev/null +++ b/static/rules/robots_strict.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: /