From 375360214ce32bb545c0fd8beef676e90d36980a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaakko=20Kera=CC=88nen?= jaakko.keranen@iki.fi
Date: Wed, 31 May 2023 22:19:37 +0300
Subject: [PATCH 1/1] Added "rewrite" module; updated documentation
The Server class was refactored to make it possible to call the
entry point for a given request via extension modules.
README.md | 8 ++-
gmcapsule/init.py | 87 +++++++++++++++++++++--
gmcapsule/gemini.py | 120 ++++++++++++++++++--------------
gmcapsule/modules/10_rewrite.py | 82 ++++++++++++++++++++++
4 files changed, 236 insertions(+), 61 deletions(-)
create mode 100644 gmcapsule/modules/10_rewrite.py
diff --git a/README.md b/README.md
index 823db45..9f8f074 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Replace <YOUR-INSTALL-PATH>
with the actual path of gmcapsuled
. pip
will i
Then you can do the usual:
systemctl --user daemon-reload
systemctl --user start gmcapsule
The log can be viewed via journalctl (or syslog):
@@ -46,6 +46,12 @@ The log can be viewed via journalctl (or syslog):
+### v0.4
+* Added built-in module "rewrite" that matches regular expressions against the request path and can rewrite the path or return a custom status for redirection, "Gone" messages, or other exceptional situations.
+* Extension module load order is determined after locating all modules from all directories. Previously, the order was local to each directory.
+* Added a new configuration section [priority]
for overriding default module priorities determined from file names. This is useful for changing the priority of the built-in modules.
diff --git a/gmcapsule/init.py b/gmcapsule/init.py
index 3afb139..f93f021 100644
--- a/gmcapsule/init.py
+++ b/gmcapsule/init.py
@@ -58,8 +58,9 @@ The GmCapsule configuration file is in `INI format
defined:
server
— server settings-- :ref:static
— serving static files
titan
— Titan upload settings+- :ref:static
— serving static files
+- :ref:rewrite.*
— URL rewriting rules
cgi
— General CGI settingscgi.*
— CGI programsgitview
— Git repository viewer settings@@ -132,6 +133,50 @@ root : path [path...]
files will be served from `/home/user/gemini/example.com/`.
+rewrite.*
+---------
+Settings for the rewrite
module that checks regular expressions against
+the request path and can rewrite the path or return a custom status. You can
+use this for internal remapping of directories and files, redirections,
+"Gone" statuses, or other exceptional situations.
+Each rewriting rule is a section that begins with rewrite.
.
+.. code-block:: ini
+protocol : string
gemini
and titan
.+host : string
server
section.+path : string
+repl : string
\\1
).+status : string
\\1
).cgi
@@ -440,7 +485,7 @@ from .gemini import Server, Cache
from .markdown import to_gemtext as markdown_to_gemtext
-version = '0.3.2'
+version = '0.4.0'
all = [
'Config', 'Capsule', 'Cache',
'get_mime_type', 'markdown_to_gemtext'
@@ -629,24 +674,40 @@ class Capsule:
self.sv.add_cache(cache)
def load_modules(self):
name_pattern = re.compile(r'[0-9][0-9]_(.*)\.py')
# The configuration can override default priorities.
mod_priority = {}
if 'priority' in self.cfg.ini:
for name, priority in self.cfg.section('priority').items():
mod_priority[name] = int(priority)
# We will load all recognized modules.
name_pattern = re.compile(r'([0-9][0-9])_(.*)\.py')
dirs = []
for user_dir in self.cfg.mod_dirs():
if user_dir not in dirs:
dirs.append(user_dir)
dirs += [Path(__file__).parent.resolve() / 'modules']
mods = []
for mdir in dirs:
for mod_file in sorted(os.listdir(mdir)):
m = name_pattern.match(mod_file)
if m:
path = (mdir / mod_file).resolve()
name = m.group(1)
name = m.group(2)
loader = importlib.machinery.SourceFileLoader(name, str(path))
spec = importlib.util.spec_from_loader(name, loader)
mod = importlib.util.module_from_spec(spec)
loader.exec_module(mod)
print('MODULE:', mod.__doc__)
mod.init(self)
if name in mod_priority:
priority = mod_priority[name]
else:
priority = int(m.group(1))
mods.append((priority, name, mod))
# Initialize in priority order.
for _, _, mod in sorted(mods):
print(f'Init:', mod.__doc__)
mod.init(self)
def shutdown_event(self):
"""
@@ -657,6 +718,20 @@ class Capsule:
"""
return self.sv.shutdown_event
"""
Calls the registered entry point for a request.
Args:
request (Request): Request object.
Returns:
Tuple with (response, cache). The response can be binary data, text,
tuple with status and meta string, or tuple with status, meta, and body.
The cache is None if the data was not read from a cache.
"""
return self.sv.call_entrypoint(request)
def run(self):
"""
Start worker threads and begin accepting incoming connections. The
diff --git a/gmcapsule/gemini.py b/gmcapsule/gemini.py
index 05794f7..26ded98 100644
--- a/gmcapsule/gemini.py
+++ b/gmcapsule/gemini.py
@@ -16,6 +16,12 @@ import OpenSSL.crypto
from OpenSSL import SSL, crypto
+class GeminiError(Exception):
Exception.__init__(self, msg)
self.status = status
class AbortedIOError(Exception):
def __init__(self, msg):
Exception.__init__(self, msg)
@@ -271,6 +277,9 @@ class Request:
self.content_mime = content_mime
self.content = content
return f'{self.scheme}://{self.hostname}{self.path}{"?" + self.query if self.query else ""}'
def verify_callback(connection, cert, err_num, err_depth, ret_code):
#print("verify_callback:", connection, cert, ret_code)
@@ -445,72 +454,42 @@ class Worker(threading.Thread):
url = urlparse(request)
path = url.path
if url.port != None and url.port != self.server.port:
report_error(stream, 59, "Invalid port number")
return
if path == '':
path = '/'
hostname = url.hostname
entrypoint = self.server.find_entrypoint(url.scheme, hostname, path)
# Server name indication is required.
if url.port != None and url.port != self.server.port:
report_error(stream, 59, "Invalid port number")
return
if not stream.get_servername():
# Server name indication is required.
report_error(stream, 59, "Missing TLS server name indication")
return
if stream.get_servername().decode() != url.hostname:
if stream.get_servername().decode() != hostname:
report_error(stream, 53, "Proxy request refused")
return
caches = [] if (url.scheme != 'gemini' or identity or len(url.query) > 0) \
else self.server.caches
from_cache = None
# print(f'Request : {request}')
# print(f'Cert : {cl_cert}')
if entrypoint:
# Check the caches first.
for cache in caches:
media, content = cache.try_load(hostname + path)
if not media is None:
response = 20, media, content
from_cache = cache
if hasattr(content, '__len__'):
print('%d bytes from cache, %s' % (len(content), media))
else:
print('stream from cache,', media)
break
# Process the request normally if there is nothing cached.
if not from_cache:
try:
response = entrypoint(Request(
identity,
remote_address=from_addr,
scheme=url.scheme,
hostname=hostname,
path=path,
query=url.query if '?' in request else None,
content_token=req_token,
content_mime=req_mime,
content=data if len(data) else None
))
except Exception as x:
import traceback
traceback.print_exception(x)
report_error(stream, 40, 'Temporary failure')
return
try:
request = Request(
identity,
remote_address=from_addr,
scheme=url.scheme,
hostname=hostname,
path=path,
query=url.query if '?' in request else None,
content_token=req_token,
content_mime=req_mime,
content=data if len(data) else None
)
response, from_cache = self.server.call_entrypoint(request)
# Determine status code, meta line, and body content.
if type(response) == tuple:
if len(response) == 2:
status = response[0]
meta = response[1]
status, meta = response
response = ''
else:
status = response[0]
meta = response[1]
response = response[2]
status, meta, response = response
else:
status = 20
meta = 'text/gemini; charset=utf-8'
@@ -528,7 +507,7 @@ class Worker(threading.Thread):
# Save to cache.
if not from_cache and status == 20 and \
(type(response_data) == bytes or type(response_data) == bytearray):
for cache in caches:
for cache in self.server.caches:
if cache.save(hostname + path, meta, response_data):
break
@@ -536,8 +515,9 @@ class Worker(threading.Thread):
if hasattr(response_data, 'close'):
response_data.close()
else:
report_error(stream, 50, 'Permanent failure')
except GeminiError as error:
report_error(stream, error.status, str(error))
return
class Server:
@@ -693,7 +673,39 @@ class Server:
handler = path_pattern(path)
if handler:
return handler
except:
except Exception as x:
print(x)
return None
return None
entrypoint = self.find_entrypoint(request.scheme, request.hostname, request.path)
caches = self.caches if (request.scheme == 'gemini' and
not request.identity and
not request.query) else []
from_cache = None
if entrypoint:
# Check the caches first.
for cache in caches:
media, content = cache.try_load(request.hostname + request.path)
if not media is None:
response = 20, media, content
if hasattr(content, '__len__'):
print('%d bytes from cache, %s' % (len(content), media))
else:
print('stream from cache,', media)
return response, cache
# Process the request normally if there is nothing cached.
if not from_cache:
try:
return entrypoint(request), None
except Exception as x:
import traceback
traceback.print_exception(x)
raise GeminiError(40, 'Temporary failure')
raise GeminiError(50, 'Permanent failure')
diff --git a/gmcapsule/modules/10_rewrite.py b/gmcapsule/modules/10_rewrite.py
new file mode 100644
index 0000000..584e7a9
--- /dev/null
+++ b/gmcapsule/modules/10_rewrite.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2023 Jaakko Keränen jaakko.keranen@iki.fi
+# License: BSD-2-Clause
+"""Rewriter"""
+import re
+class PathRewriteHandler:
self.capsule = capsule
self.rewritten_path = rewritten_path
old_path = req.path
req.path = self.rewritten_path
# Don't allow rewriting the same request too many times.
if hasattr(req, 'num_rewrites'):
req.num_rewrites += 1
else:
req.num_rewrites = 1
if req.num_rewrites == 100:
return 40, "Stuck in rewrite loop: " + req.url()
print("[rewrite]", old_path, "->", req.path)
return self.capsule.call_entrypoint(req)[0]
+class Responder:
self.response = response
return self.response
+class Rewriter:
self.capsule = capsule
self.protocol = protocol
self.host = host
self.src_path = src_path
self.dst_path = dst_path
self.status = status
# If path matches a rewritten URL, return the handler object that calls the
# correct handler for the updated URL.
if self.dst_path:
new_path = self.src_path.sub(self.dst_path, path)
if new_path != path:
return PathRewriteHandler(self.capsule, new_path)
elif self.status:
m = self.src_path.match(path)
if m:
status = self.status
for i in range(self.src_path.groups + 1):
cap = m[i]
if cap:
status = status.replace(f'\\{i}', cap)
code, meta = status.split()
print("[rewrite]", code, meta)
return Responder((int(code), meta))
return None
+def init(capsule):
protocol = section.get('protocol', None)
host = section.get('host', cfg.hostnames()[0])
src_path = re.compile(section.get('path'))
dst_path = section.get('repl', None)
status = section.get('status', None)
for proto in [protocol] if protocol else ['gemini', 'titan']:
capsule.add(Rewriter(capsule, proto, host, src_path, dst_path, status),
None, # `Rewriter` will return a suitable handler callback.
host,
proto)
--
2.25.1
text/plain
This content has been proxied by September (ba2dc).