[1mdiff --git a/README.md b/README.md[m
[1mindex 823db45..9f8f074 100644[m
[1m--- a/README.md[m
[1m+++ b/README.md[m
[36m@@ -37,7 +37,7 @@[m [mReplace <YOUR-INSTALL-PATH>
with the actual path of gmcapsuled
. pip
will i[m
Then you can do the usual:[m
[m
systemctl --user daemon-reload[m
[31m- systemctl --user enable gmcapsule.service[m
[32m+[m[32m systemctl --user enable gmcapsule[m
systemctl --user start gmcapsule[m
[m
The log can be viewed via journalctl (or syslog):[m
[36m@@ -46,6 +46,12 @@[m [mThe log can be viewed via journalctl (or syslog):[m
[m
[m
[32m+[m[32m### v0.4[m
[32m+[m
[32m+[m[32m* Added built-in module "rewrite" that matches regular expressions against the request path and can rewrite the path or return a custom status for redirection, "Gone" messages, or other exceptional situations.[m
[32m+[m[32m* Extension module load order is determined after locating all modules from all directories. Previously, the order was local to each directory.[m
[32m+[m[32m* Added a new configuration section [priority]
for overriding default module priorities determined from file names. This is useful for changing the priority of the built-in modules.[m
[32m+[m
[m
[1mdiff --git a/gmcapsule/init.py b/gmcapsule/init.py[m
[1mindex 3afb139..f93f021 100644[m
[1m--- a/gmcapsule/init.py[m
[1m+++ b/gmcapsule/init.py[m
[36m@@ -58,8 +58,9 @@[m [mThe GmCapsule configuration file is in `INI format[m
defined:[m
[m
server
— server settings[m[31m-- :ref:static
— serving static files[m
titan
— Titan upload settings[m[32m+[m[32m- :ref:static
— serving static files[m
[32m+[m[32m- :ref:rewrite.*
— URL rewriting rules[m
cgi
— General CGI settings[mcgi.*
— CGI programs[mgitview
— Git repository viewer settings[m[36m@@ -132,6 +133,50 @@[m [mroot : path [path...][m
files will be served from `/home/user/gemini/example.com/`.[m
[m
[m
[32m+[m[32mrewrite.*[m
[32m+[m[32m---------[m
[32m+[m
[32m+[m[32mSettings for the rewrite
module that checks regular expressions against[m
[32m+[m[32mthe request path and can rewrite the path or return a custom status. You can[m
[32m+[m[32muse this for internal remapping of directories and files, redirections,[m
[32m+[m[32m"Gone" statuses, or other exceptional situations.[m
[32m+[m
[32m+[m[32mEach rewriting rule is a section that begins with rewrite.
.[m
[32m+[m
[32m+[m[32m.. code-block:: ini[m
[32m+[m
[32m+[m[32m [rewrite.rename][m
[32m+[m[32m path = ^/old-path/[m
[32m+[m[32m repl = /new-location/[m
[32m+[m
[32m+[m[32m [rewrite.elsewhere][m
[32m+[m[32m path = .*\.gmi$[m
[32m+[m[32m status = 31 gemini://mygemlog.space/\1.gmi[m
[32m+[m
[32m+[m[32mprotocol : string[m
[32m+[m[32m Protocol for the rewrite rule. If omitted, the rule applies to both[m
[32m+[m[32m gemini
and titan
.[m
[32m+[m
[32m+[m[32mhost : string[m
[32m+[m[32m Hostname for the rewrite rule. If omitted, defaults to the first[m
[32m+[m[32m hostname defined in the :ref:server
section.[m
[32m+[m
[32m+[m[32mpath : string[m
[32m+[m[32m Regular expression that is matched against the request path. You may use[m
[32m+[m[32m capture groups and refer to them in the replacement text. Note that the[m
[32m+[m[32m request path always begins with a slash.[m
[32m+[m
[32m+[m[32mrepl : string[m
[32m+[m[32m Replacement path. The part of the request path that matches the "path"[m
[32m+[m[32m pattern is replaced with this. You can use backslashes to refer to[m
[32m+[m[32m capture groups (\\1
).[m
[32m+[m
[32m+[m[32mstatus : string[m
[32m+[m[32m Custom status to respond with. Must begin with the status code followed[m
[32m+[m[32m by the meta line. You can use backslashes to refer to capture groups[m
[32m+[m[32m (\\1
).[m
[32m+[m
[32m+[m
cgi[m
---[m
[m
[36m@@ -440,7 +485,7 @@[m [mfrom .gemini import Server, Cache[m
from .markdown import to_gemtext as markdown_to_gemtext[m
[m
[m
[31m-version = '0.3.2'[m
[32m+[m[32m__version__ = '0.4.0'[m
all = [[m
'Config', 'Capsule', 'Cache',[m
'get_mime_type', 'markdown_to_gemtext'[m
[36m@@ -629,24 +674,40 @@[m [mclass Capsule:[m
self.sv.add_cache(cache)[m
[m
def load_modules(self):[m
[31m- name_pattern = re.compile(r'[0-9][0-9]_(.*).py')[m
[32m+[m[32m # The configuration can override default priorities.[m
[32m+[m[32m mod_priority = {}[m
[32m+[m[32m if 'priority' in self.cfg.ini:[m
[32m+[m[32m for name, priority in self.cfg.section('priority').items():[m
[32m+[m[32m mod_priority[name] = int(priority)[m
[32m+[m
[32m+[m[32m # We will load all recognized modules.[m
[32m+[m[32m name_pattern = re.compile(r'([0-9][0-9])_(.*).py')[m
dirs = [][m
for user_dir in self.cfg.mod_dirs():[m
if user_dir not in dirs:[m
dirs.append(user_dir)[m
dirs += [Path(__file__).parent.resolve() / 'modules'][m
[32m+[m[32m mods = [][m
for mdir in dirs:[m
for mod_file in sorted(os.listdir(mdir)):[m
m = name_pattern.match(mod_file)[m
if m:[m
path = (mdir / mod_file).resolve()[m
[31m- name = m.group(1)[m
[32m+[m[32m name = m.group(2)[m
loader = importlib.machinery.SourceFileLoader(name, str(path))[m
spec = importlib.util.spec_from_loader(name, loader)[m
mod = importlib.util.module_from_spec(spec)[m
loader.exec_module(mod)[m
[31m- print('MODULE:', mod.doc)[m
[31m- mod.init(self)[m
[32m+[m[32m if name in mod_priority:[m
[32m+[m[32m priority = mod_priority[name][m
[32m+[m[32m else:[m
[32m+[m[32m priority = int(m.group(1))[m
[32m+[m[32m mods.append((priority, name, mod))[m
[32m+[m
[32m+[m[32m # Initialize in priority order.[m
[32m+[m[32m for _, _, mod in sorted(mods):[m
[32m+[m[32m print(f'Init:', mod.doc)[m
[32m+[m[32m mod.init(self)[m
[m
def shutdown_event(self):[m
"""[m
[36m@@ -657,6 +718,20 @@[m [mclass Capsule:[m
"""[m
return self.sv.shutdown_event[m
[m
[32m+[m[32m def call_entrypoint(self, request):[m
[32m+[m[32m """[m
[32m+[m[32m Calls the registered entry point for a request.[m
[32m+[m
[32m+[m[32m Args:[m
[32m+[m[32m request (Request): Request object.[m
[32m+[m
[32m+[m[32m Returns:[m
[32m+[m[32m Tuple with (response, cache). The response can be binary data, text,[m
[32m+[m[32m tuple with status and meta string, or tuple with status, meta, and body.[m
[32m+[m[32m The cache is None if the data was not read from a cache.[m
[32m+[m[32m """[m
[32m+[m[32m return self.sv.call_entrypoint(request)[m
[32m+[m
def run(self):[m
"""[m
Start worker threads and begin accepting incoming connections. The[m
[1mdiff --git a/gmcapsule/gemini.py b/gmcapsule/gemini.py[m
[1mindex 05794f7..26ded98 100644[m
[1m--- a/gmcapsule/gemini.py[m
[1m+++ b/gmcapsule/gemini.py[m
[36m@@ -16,6 +16,12 @@[m [mimport OpenSSL.crypto[m
from OpenSSL import SSL, crypto[m
[m
[m
[32m+[m[32mclass GeminiError(Exception):[m
[32m+[m[32m def init(self, status, msg):[m
[32m+[m[32m Exception.init(self, msg)[m
[32m+[m[32m self.status = status[m
[32m+[m
[32m+[m
class AbortedIOError(Exception):[m
def __init__(self, msg):[m
Exception.__init__(self, msg)[m
[36m@@ -271,6 +277,9 @@[m [mclass Request:[m
self.content_mime = content_mime[m
self.content = content[m
[m
[32m+[m[32m def url(self):[m
[32m+[m[32m return f'{self.scheme}://{self.hostname}{self.path}{"?" + self.query if self.query else ""}'[m
[32m+[m
[m
def verify_callback(connection, cert, err_num, err_depth, ret_code):[m
#print("verify_callback:", connection, cert, ret_code)[m
[36m@@ -445,72 +454,42 @@[m [mclass Worker(threading.Thread):[m
[m
url = urlparse(request)[m
path = url.path[m
[31m- if url.port != None and url.port != self.server.port:[m
[31m- report_error(stream, 59, "Invalid port number")[m
[31m- return[m
if path == '':[m
path = '/'[m
hostname = url.hostname[m
[31m- entrypoint = self.server.find_entrypoint(url.scheme, hostname, path)[m
[m
[31m- # Server name indication is required.[m
[32m+[m[32m if url.port != None and url.port != self.server.port:[m
[32m+[m[32m report_error(stream, 59, "Invalid port number")[m
[32m+[m[32m return[m
if not stream.get_servername():[m
[32m+[m[32m # Server name indication is required.[m
report_error(stream, 59, "Missing TLS server name indication")[m
return[m
[31m- if stream.get_servername().decode() != url.hostname:[m
[32m+[m[32m if stream.get_servername().decode() != hostname:[m
report_error(stream, 53, "Proxy request refused")[m
return[m
[m
[31m- caches = [] if (url.scheme != 'gemini' or identity or len(url.query) > 0) \[m
[31m- else self.server.caches[m
[31m- from_cache = None[m
[31m-[m
[31m- # print(f'Request : {request}')[m
[31m- # print(f'Cert : {cl_cert}')[m
[31m-[m
[31m- if entrypoint:[m
[31m- # Check the caches first.[m
[31m- for cache in caches:[m
[31m- media, content = cache.try_load(hostname + path)[m
[31m- if not media is None:[m
[31m- response = 20, media, content[m
[31m- from_cache = cache[m
[31m- if hasattr(content, 'len'):[m
[31m- print('%d bytes from cache, %s' % (len(content), media))[m
[31m- else:[m
[31m- print('stream from cache,', media)[m
[31m- break[m
[31m-[m
[31m- # Process the request normally if there is nothing cached.[m
[31m- if not from_cache:[m
[31m- try:[m
[31m- response = entrypoint(Request([m
[31m- identity,[m
[31m- remote_address=from_addr,[m
[31m- scheme=url.scheme,[m
[31m- hostname=hostname,[m
[31m- path=path,[m
[31m- query=url.query if '?' in request else None,[m
[31m- content_token=req_token,[m
[31m- content_mime=req_mime,[m
[31m- content=data if len(data) else None[m
[31m- ))[m
[31m- except Exception as x:[m
[31m- import traceback[m
[31m- traceback.print_exception(x)[m
[31m- report_error(stream, 40, 'Temporary failure')[m
[31m- return[m
[32m+[m[32m try:[m
[32m+[m[32m request = Request([m
[32m+[m[32m identity,[m
[32m+[m[32m remote_address=from_addr,[m
[32m+[m[32m scheme=url.scheme,[m
[32m+[m[32m hostname=hostname,[m
[32m+[m[32m path=path,[m
[32m+[m[32m query=url.query if '?' in request else None,[m
[32m+[m[32m content_token=req_token,[m
[32m+[m[32m content_mime=req_mime,[m
[32m+[m[32m content=data if len(data) else None[m
[32m+[m[32m )[m
[32m+[m[32m response, from_cache = self.server.call_entrypoint(request)[m
[m
# Determine status code, meta line, and body content.[m
if type(response) == tuple:[m
if len(response) == 2:[m
[31m- status = response[0][m
[31m- meta = response[1][m
[32m+[m[32m status, meta = response[m
response = ''[m
else:[m
[31m- status = response[0][m
[31m- meta = response[1][m
[31m- response = response[2][m
[32m+[m[32m status, meta, response = response[m
else:[m
status = 20[m
meta = 'text/gemini; charset=utf-8'[m
[36m@@ -528,7 +507,7 @@[m [mclass Worker(threading.Thread):[m
# Save to cache.[m
if not from_cache and status == 20 and \[m
(type(response_data) == bytes or type(response_data) == bytearray):[m
[31m- for cache in caches:[m
[32m+[m[32m for cache in self.server.caches:[m
if cache.save(hostname + path, meta, response_data):[m
break[m
[m
[36m@@ -536,8 +515,9 @@[m [mclass Worker(threading.Thread):[m
if hasattr(response_data, 'close'):[m
response_data.close()[m
[m
[31m- else:[m
[31m- report_error(stream, 50, 'Permanent failure')[m
[32m+[m[32m except GeminiError as error:[m
[32m+[m[32m report_error(stream, error.status, str(error))[m
[32m+[m[32m return[m
[m
[m
class Server:[m
[36m@@ -693,7 +673,39 @@[m [mclass Server:[m
handler = path_pattern(path)[m
if handler:[m
return handler[m
[31m- except:[m
[32m+[m[32m except Exception as x:[m
[32m+[m[32m print(x)[m
return None[m
[m
return None[m
[32m+[m
[32m+[m[32m def call_entrypoint(self, request):[m
[32m+[m[32m entrypoint = self.find_entrypoint(request.scheme, request.hostname, request.path)[m
[32m+[m
[32m+[m[32m caches = self.caches if (request.scheme == 'gemini' and[m
[32m+[m[32m not request.identity and[m
[32m+[m[32m not request.query) else [][m
[32m+[m[32m from_cache = None[m
[32m+[m
[32m+[m[32m if entrypoint:[m
[32m+[m[32m # Check the caches first.[m
[32m+[m[32m for cache in caches:[m
[32m+[m[32m media, content = cache.try_load(request.hostname + request.path)[m
[32m+[m[32m if not media is None:[m
[32m+[m[32m response = 20, media, content[m
[32m+[m[32m if hasattr(content, 'len'):[m
[32m+[m[32m print('%d bytes from cache, %s' % (len(content), media))[m
[32m+[m[32m else:[m
[32m+[m[32m print('stream from cache,', media)[m
[32m+[m[32m return response, cache[m
[32m+[m
[32m+[m[32m # Process the request normally if there is nothing cached.[m
[32m+[m[32m if not from_cache:[m
[32m+[m[32m try:[m
[32m+[m[32m return entrypoint(request), None[m
[32m+[m[32m except Exception as x:[m
[32m+[m[32m import traceback[m
[32m+[m[32m traceback.print_exception(x)[m
[32m+[m[32m raise GeminiError(40, 'Temporary failure')[m
[32m+[m
[32m+[m[32m raise GeminiError(50, 'Permanent failure')[m
[1mdiff --git a/gmcapsule/modules/10_rewrite.py b/gmcapsule/modules/10_rewrite.py[m
[1mnew file mode 100644[m
[1mindex 0000000..584e7a9[m
[1m--- /dev/null[m
[1m+++ b/gmcapsule/modules/10_rewrite.py[m
[36m@@ -0,0 +1,82 @@[m
[32m+[m[32m# Copyright (c) 2023 Jaakko Keränen jaakko.keranen@iki.fi[m
[32m+[m[32m# License: BSD-2-Clause[m
[32m+[m
[32m+[m[32m"""Rewriter"""[m
[32m+[m
[32m+[m[32mimport re[m
[32m+[m
[32m+[m
[32m+[m[32mclass PathRewriteHandler:[m
[32m+[m[32m def init(self, capsule, rewritten_path):[m
[32m+[m[32m self.capsule = capsule[m
[32m+[m[32m self.rewritten_path = rewritten_path[m
[32m+[m
[32m+[m[32m def call(self, req):[m
[32m+[m[32m old_path = req.path[m
[32m+[m[32m req.path = self.rewritten_path[m
[32m+[m
[32m+[m[32m # Don't allow rewriting the same request too many times.[m
[32m+[m[32m if hasattr(req, 'num_rewrites'):[m
[32m+[m[32m req.num_rewrites += 1[m
[32m+[m[32m else:[m
[32m+[m[32m req.num_rewrites = 1[m
[32m+[m[32m if req.num_rewrites == 100:[m
[32m+[m[32m return 40, "Stuck in rewrite loop: " + req.url()[m
[32m+[m
[32m+[m[32m print("[rewrite]", old_path, "->", req.path)[m
[32m+[m[32m return self.capsule.call_entrypoint(req)[0][m
[32m+[m
[32m+[m
[32m+[m[32mclass Responder:[m
[32m+[m[32m def init(self, response):[m
[32m+[m[32m self.response = response[m
[32m+[m
[32m+[m[32m def call(self, req):[m
[32m+[m[32m return self.response[m
[32m+[m
[32m+[m
[32m+[m[32mclass Rewriter:[m
[32m+[m[32m def init(self, capsule, protocol, host, src_path, dst_path, status):[m
[32m+[m[32m self.capsule = capsule[m
[32m+[m[32m self.protocol = protocol[m
[32m+[m[32m self.host = host[m
[32m+[m[32m self.src_path = src_path[m
[32m+[m[32m self.dst_path = dst_path[m
[32m+[m[32m self.status = status[m
[32m+[m
[32m+[m[32m def call(self, path):[m
[32m+[m[32m # If path matches a rewritten URL, return the handler object that calls the[m
[32m+[m[32m # correct handler for the updated URL.[m
[32m+[m[32m if self.dst_path:[m
[32m+[m[32m new_path = self.src_path.sub(self.dst_path, path)[m
[32m+[m[32m if new_path != path:[m
[32m+[m[32m return PathRewriteHandler(self.capsule, new_path)[m
[32m+[m
[32m+[m[32m elif self.status:[m
[32m+[m[32m m = self.src_path.match(path)[m
[32m+[m[32m if m:[m
[32m+[m[32m status = self.status[m
[32m+[m[32m for i in range(self.src_path.groups + 1):[m
[32m+[m[32m cap = m[i][m
[32m+[m[32m if cap:[m
[32m+[m[32m status = status.replace(f'\{i}', cap)[m
[32m+[m[32m code, meta = status.split()[m
[32m+[m[32m print("[rewrite]", code, meta)[m
[32m+[m[32m return Responder((int(code), meta))[m
[32m+[m
[32m+[m[32m return None[m
[32m+[m
[32m+[m
[32m+[m[32mdef init(capsule):[m
[32m+[m[32m cfg = capsule.config()[m
[32m+[m[32m for section in cfg.prefixed_sections('rewrite.').values():[m
[32m+[m[32m protocol = section.get('protocol', None)[m
[32m+[m[32m host = section.get('host', cfg.hostnames()[0])[m
[32m+[m[32m src_path = re.compile(section.get('path'))[m
[32m+[m[32m dst_path = section.get('repl', None)[m
[32m+[m[32m status = section.get('status', None)[m
[32m+[m[32m for proto in [protocol] if protocol else ['gemini', 'titan']:[m
[32m+[m[32m capsule.add(Rewriter(capsule, proto, host, src_path, dst_path, status),[m
[32m+[m[32m None, # Rewriter
will return a suitable handler callback.[m
[32m+[m[32m host,[m
[32m+[m[32m proto)[m
text/plain
This content has been proxied by September (ba2dc).