From a8853cdc59ab5a266a3d993c69e737263b2ad630 Mon Sep 17 00:00:00 2001
From: John Xina <bingchilling@riseup.net>
Date: Fri, 21 Jul 2023 18:04:54 +0800
Subject: [PATCH] add robots.txt support, default to strict policy

---
 app.py                          | 11 ++++++++++-
 config.toml.sample              |  8 ++++++++
 static/rules/robots_relaxed.txt |  6 ++++++
 static/rules/robots_strict.txt  |  2 ++
 4 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 static/rules/robots_relaxed.txt
 create mode 100644 static/rules/robots_strict.txt

diff --git a/app.py b/app.py
index c372991..b6a571a 100644
--- a/app.py
+++ b/app.py
@@ -17,7 +17,7 @@ import requests
 from urllib.parse import urlparse
 
 import asyncio
-from aioflask import request, make_response, redirect, url_for
+from aioflask import request, make_response, redirect, url_for, send_from_directory
 
 import subprocess
 from bs4 import BeautifulSoup
@@ -90,6 +90,15 @@ async def test_view():
     resp.set_cookie('theme', 'default')
     return resp
 
+@app.route('/robots.txt')
+def robots_txt():
+    policy = appconf['site'].get('robots_policy') or 'strict'
+    
+    if policy == 'PLEASE_INDEX_EVERYTHING':
+        return '', 404
+    
+    return send_from_directory(f'static/rules', f'robots_{policy}.txt')
+
 ##########################################
 # Error handling
 ##########################################
diff --git a/config.toml.sample b/config.toml.sample
index 54334d3..196ef5c 100644
--- a/config.toml.sample
+++ b/config.toml.sample
@@ -12,6 +12,14 @@ site_allow_download = true
 # This will allow (potentially unsafe) error response to be showed directly on site.
 site_show_unsafe_error_response = false
 
+# This controls whether the search engines will index your site or not.
+# Available options:
+#    - 'strict', indexing is disallowed.
+#	 - 'relexed', search engines can index articles and search page.
+#    - 'PLEASE_INDEX_EVERYTHING', a VERY DANGEROUS option, may give you a lawsuit.
+# Default to 'strict'.
+robots_policy = 'strict'
+
 [flask]
 host = '0.0.0.0'
 
diff --git a/static/rules/robots_relaxed.txt b/static/rules/robots_relaxed.txt
new file mode 100644
index 0000000..9f80730
--- /dev/null
+++ b/static/rules/robots_relaxed.txt
@@ -0,0 +1,6 @@
+User-agent: *
+Disallow: /vv/
+Disallow: /space/
+Disallow: /author/
+Disallow: /video_listen/
+Disallow: /video/
diff --git a/static/rules/robots_strict.txt b/static/rules/robots_strict.txt
new file mode 100644
index 0000000..1f53798
--- /dev/null
+++ b/static/rules/robots_strict.txt
@@ -0,0 +1,2 @@
+User-agent: *
+Disallow: /
-- 
GitLab