=> 375360214ce32bb545c0fd8beef676e90d36980a
[1mdiff --git a/README.md b/README.md[m [1mindex 823db45..9f8f074 100644[m [1m--- a/README.md[m [1m+++ b/README.md[m [36m@@ -37,7 +37,7 @@[m [mReplace `` with the actual path of `gmcapsuled`. `pip` will i[m Then you can do the usual:[m [m systemctl --user daemon-reload[m [31m- systemctl --user enable gmcapsule.service[m [32m+[m[32m systemctl --user enable gmcapsule[m systemctl --user start gmcapsule[m [m The log can be viewed via journalctl (or syslog):[m [36m@@ -46,6 +46,12 @@[m [mThe log can be viewed via journalctl (or syslog):[m [m ## Change log[m [m [32m+[m[32m### v0.4[m [32m+[m [32m+[m[32m* Added built-in module "rewrite" that matches regular expressions against the request path and can rewrite the path or return a custom status for redirection, "Gone" messages, or other exceptional situations.[m [32m+[m[32m* Extension module load order is determined after locating all modules from all directories. Previously, the order was local to each directory.[m [32m+[m[32m* Added a new configuration section `[priority]` for overriding default module priorities determined from file names. This is useful for changing the priority of the built-in modules.[m [32m+[m ### v0.3[m [m * Added a shutdown event for custom background workers.[m [1mdiff --git a/gmcapsule/__init__.py b/gmcapsule/__init__.py[m [1mindex 3afb139..f93f021 100644[m [1m--- a/gmcapsule/__init__.py[m [1m+++ b/gmcapsule/__init__.py[m [36m@@ -58,8 +58,9 @@[m [mThe GmCapsule configuration file is in `INI format[m defined:[m [m - :ref:`server` — server settings[m [31m-- :ref:`static` — serving static files[m - :ref:`titan` — Titan upload settings[m [32m+[m[32m- :ref:`static` — serving static files[m [32m+[m[32m- :ref:`rewrite.*` — URL rewriting rules[m - :ref:`cgi` — General CGI settings[m - :ref:`cgi.*` — CGI programs[m - :ref:`gitview` — Git repository viewer settings[m [36m@@ -132,6 +133,50 @@[m [mroot : path [path...][m files will be served from `/home/user/gemini/example.com/`.[m [m [m [32m+[m[32mrewrite.*[m [32m+[m[32m---------[m [32m+[m [32m+[m[32mSettings for the `rewrite` module that checks regular expressions against[m [32m+[m[32mthe request path and can rewrite the path or return a custom status. You can[m [32m+[m[32muse this for internal remapping of directories and files, redirections,[m [32m+[m[32m"Gone" statuses, or other exceptional situations.[m [32m+[m [32m+[m[32mEach rewriting rule is a section that begins with ``rewrite.``.[m [32m+[m [32m+[m[32m.. code-block:: ini[m [32m+[m [32m+[m[32m [rewrite.rename][m [32m+[m[32m path = ^/old-path/[m [32m+[m[32m repl = /new-location/[m [32m+[m [32m+[m[32m [rewrite.elsewhere][m [32m+[m[32m path = .*\\.gmi$[m [32m+[m[32m status = 31 gemini://mygemlog.space/\\1.gmi[m [32m+[m [32m+[m[32mprotocol : string[m [32m+[m[32m Protocol for the rewrite rule. If omitted, the rule applies to both[m [32m+[m[32m ``gemini`` and ``titan``.[m [32m+[m [32m+[m[32mhost : string[m [32m+[m[32m Hostname for the rewrite rule. If omitted, defaults to the first[m [32m+[m[32m hostname defined in the :ref:`server` section.[m [32m+[m [32m+[m[32mpath : string[m [32m+[m[32m Regular expression that is matched against the request path. You may use[m [32m+[m[32m capture groups and refer to them in the replacement text. Note that the[m [32m+[m[32m request path always begins with a slash.[m [32m+[m [32m+[m[32mrepl : string[m [32m+[m[32m Replacement path. The part of the request path that matches the "path"[m [32m+[m[32m pattern is replaced with this. You can use backslashes to refer to[m [32m+[m[32m capture groups (``\\1``).[m [32m+[m [32m+[m[32mstatus : string[m [32m+[m[32m Custom status to respond with. Must begin with the status code followed[m [32m+[m[32m by the meta line. You can use backslashes to refer to capture groups[m [32m+[m[32m (``\\1``).[m [32m+[m [32m+[m cgi[m ---[m [m [36m@@ -440,7 +485,7 @@[m [mfrom .gemini import Server, Cache[m from .markdown import to_gemtext as markdown_to_gemtext[m [m [m [31m-__version__ = '0.3.2'[m [32m+[m[32m__version__ = '0.4.0'[m __all__ = [[m 'Config', 'Capsule', 'Cache',[m 'get_mime_type', 'markdown_to_gemtext'[m [36m@@ -629,24 +674,40 @@[m [mclass Capsule:[m self.sv.add_cache(cache)[m [m def load_modules(self):[m [31m- name_pattern = re.compile(r'[0-9][0-9]_(.*)\.py')[m [32m+[m[32m # The configuration can override default priorities.[m [32m+[m[32m mod_priority = {}[m [32m+[m[32m if 'priority' in self.cfg.ini:[m [32m+[m[32m for name, priority in self.cfg.section('priority').items():[m [32m+[m[32m mod_priority[name] = int(priority)[m [32m+[m [32m+[m[32m # We will load all recognized modules.[m [32m+[m[32m name_pattern = re.compile(r'([0-9][0-9])_(.*)\.py')[m dirs = [][m for user_dir in self.cfg.mod_dirs():[m if user_dir not in dirs:[m dirs.append(user_dir)[m dirs += [Path(__file__).parent.resolve() / 'modules'][m [32m+[m[32m mods = [][m for mdir in dirs:[m for mod_file in sorted(os.listdir(mdir)):[m m = name_pattern.match(mod_file)[m if m:[m path = (mdir / mod_file).resolve()[m [31m- name = m.group(1)[m [32m+[m[32m name = m.group(2)[m loader = importlib.machinery.SourceFileLoader(name, str(path))[m spec = importlib.util.spec_from_loader(name, loader)[m mod = importlib.util.module_from_spec(spec)[m loader.exec_module(mod)[m [31m- print('MODULE:', mod.__doc__)[m [31m- mod.init(self)[m [32m+[m[32m if name in mod_priority:[m [32m+[m[32m priority = mod_priority[name][m [32m+[m[32m else:[m [32m+[m[32m priority = int(m.group(1))[m [32m+[m[32m mods.append((priority, name, mod))[m [32m+[m [32m+[m[32m # Initialize in priority order.[m [32m+[m[32m for _, _, mod in sorted(mods):[m [32m+[m[32m print(f'Init:', mod.__doc__)[m [32m+[m[32m mod.init(self)[m [m def shutdown_event(self):[m """[m [36m@@ -657,6 +718,20 @@[m [mclass Capsule:[m """[m return self.sv.shutdown_event[m [m [32m+[m[32m def call_entrypoint(self, request):[m [32m+[m[32m """[m [32m+[m[32m Calls the registered entry point for a request.[m [32m+[m [32m+[m[32m Args:[m [32m+[m[32m request (Request): Request object.[m [32m+[m [32m+[m[32m Returns:[m [32m+[m[32m Tuple with (response, cache). The response can be binary data, text,[m [32m+[m[32m tuple with status and meta string, or tuple with status, meta, and body.[m [32m+[m[32m The cache is None if the data was not read from a cache.[m [32m+[m[32m """[m [32m+[m[32m return self.sv.call_entrypoint(request)[m [32m+[m def run(self):[m """[m Start worker threads and begin accepting incoming connections. The[m [1mdiff --git a/gmcapsule/gemini.py b/gmcapsule/gemini.py[m [1mindex 05794f7..26ded98 100644[m [1m--- a/gmcapsule/gemini.py[m [1m+++ b/gmcapsule/gemini.py[m [36m@@ -16,6 +16,12 @@[m [mimport OpenSSL.crypto[m from OpenSSL import SSL, crypto[m [m [m [32m+[m[32mclass GeminiError(Exception):[m [32m+[m[32m def __init__(self, status, msg):[m [32m+[m[32m Exception.__init__(self, msg)[m [32m+[m[32m self.status = status[m [32m+[m [32m+[m class AbortedIOError(Exception):[m def __init__(self, msg):[m Exception.__init__(self, msg)[m [36m@@ -271,6 +277,9 @@[m [mclass Request:[m self.content_mime = content_mime[m self.content = content[m [m [32m+[m[32m def url(self):[m [32m+[m[32m return f'{self.scheme}://{self.hostname}{self.path}{"?" + self.query if self.query else ""}'[m [32m+[m [m def verify_callback(connection, cert, err_num, err_depth, ret_code):[m #print("verify_callback:", connection, cert, ret_code)[m [36m@@ -445,72 +454,42 @@[m [mclass Worker(threading.Thread):[m [m url = urlparse(request)[m path = url.path[m [31m- if url.port != None and url.port != self.server.port:[m [31m- report_error(stream, 59, "Invalid port number")[m [31m- return[m if path == '':[m path = '/'[m hostname = url.hostname[m [31m- entrypoint = self.server.find_entrypoint(url.scheme, hostname, path)[m [m [31m- # Server name indication is required.[m [32m+[m[32m if url.port != None and url.port != self.server.port:[m [32m+[m[32m report_error(stream, 59, "Invalid port number")[m [32m+[m[32m return[m if not stream.get_servername():[m [32m+[m[32m # Server name indication is required.[m report_error(stream, 59, "Missing TLS server name indication")[m return[m [31m- if stream.get_servername().decode() != url.hostname:[m [32m+[m[32m if stream.get_servername().decode() != hostname:[m report_error(stream, 53, "Proxy request refused")[m return[m [m [31m- caches = [] if (url.scheme != 'gemini' or identity or len(url.query) > 0) \[m [31m- else self.server.caches[m [31m- from_cache = None[m [31m-[m [31m- # print(f'Request : {request}')[m [31m- # print(f'Cert : {cl_cert}')[m [31m-[m [31m- if entrypoint:[m [31m- # Check the caches first.[m [31m- for cache in caches:[m [31m- media, content = cache.try_load(hostname + path)[m [31m- if not media is None:[m [31m- response = 20, media, content[m [31m- from_cache = cache[m [31m- if hasattr(content, '__len__'):[m [31m- print('%d bytes from cache, %s' % (len(content), media))[m [31m- else:[m [31m- print('stream from cache,', media)[m [31m- break[m [31m-[m [31m- # Process the request normally if there is nothing cached.[m [31m- if not from_cache:[m [31m- try:[m [31m- response = entrypoint(Request([m [31m- identity,[m [31m- remote_address=from_addr,[m [31m- scheme=url.scheme,[m [31m- hostname=hostname,[m [31m- path=path,[m [31m- query=url.query if '?' in request else None,[m [31m- content_token=req_token,[m [31m- content_mime=req_mime,[m [31m- content=data if len(data) else None[m [31m- ))[m [31m- except Exception as x:[m [31m- import traceback[m [31m- traceback.print_exception(x)[m [31m- report_error(stream, 40, 'Temporary failure')[m [31m- return[m [32m+[m[32m try:[m [32m+[m[32m request = Request([m [32m+[m[32m identity,[m [32m+[m[32m remote_address=from_addr,[m [32m+[m[32m scheme=url.scheme,[m [32m+[m[32m hostname=hostname,[m [32m+[m[32m path=path,[m [32m+[m[32m query=url.query if '?' in request else None,[m [32m+[m[32m content_token=req_token,[m [32m+[m[32m content_mime=req_mime,[m [32m+[m[32m content=data if len(data) else None[m [32m+[m[32m )[m [32m+[m[32m response, from_cache = self.server.call_entrypoint(request)[m [m # Determine status code, meta line, and body content.[m if type(response) == tuple:[m if len(response) == 2:[m [31m- status = response[0][m [31m- meta = response[1][m [32m+[m[32m status, meta = response[m response = ''[m else:[m [31m- status = response[0][m [31m- meta = response[1][m [31m- response = response[2][m [32m+[m[32m status, meta, response = response[m else:[m status = 20[m meta = 'text/gemini; charset=utf-8'[m [36m@@ -528,7 +507,7 @@[m [mclass Worker(threading.Thread):[m # Save to cache.[m if not from_cache and status == 20 and \[m (type(response_data) == bytes or type(response_data) == bytearray):[m [31m- for cache in caches:[m [32m+[m[32m for cache in self.server.caches:[m if cache.save(hostname + path, meta, response_data):[m break[m [m [36m@@ -536,8 +515,9 @@[m [mclass Worker(threading.Thread):[m if hasattr(response_data, 'close'):[m response_data.close()[m [m [31m- else:[m [31m- report_error(stream, 50, 'Permanent failure')[m [32m+[m[32m except GeminiError as error:[m [32m+[m[32m report_error(stream, error.status, str(error))[m [32m+[m[32m return[m [m [m class Server:[m [36m@@ -693,7 +673,39 @@[m [mclass Server:[m handler = path_pattern(path)[m if handler:[m return handler[m [31m- except:[m [32m+[m[32m except Exception as x:[m [32m+[m[32m print(x)[m return None[m [m return None[m [32m+[m [32m+[m[32m def call_entrypoint(self, request):[m [32m+[m[32m entrypoint = self.find_entrypoint(request.scheme, request.hostname, request.path)[m [32m+[m [32m+[m[32m caches = self.caches if (request.scheme == 'gemini' and[m [32m+[m[32m not request.identity and[m [32m+[m[32m not request.query) else [][m [32m+[m[32m from_cache = None[m [32m+[m [32m+[m[32m if entrypoint:[m [32m+[m[32m # Check the caches first.[m [32m+[m[32m for cache in caches:[m [32m+[m[32m media, content = cache.try_load(request.hostname + request.path)[m [32m+[m[32m if not media is None:[m [32m+[m[32m response = 20, media, content[m [32m+[m[32m if hasattr(content, '__len__'):[m [32m+[m[32m print('%d bytes from cache, %s' % (len(content), media))[m [32m+[m[32m else:[m [32m+[m[32m print('stream from cache,', media)[m [32m+[m[32m return response, cache[m [32m+[m [32m+[m[32m # Process the request normally if there is nothing cached.[m [32m+[m[32m if not from_cache:[m [32m+[m[32m try:[m [32m+[m[32m return entrypoint(request), None[m [32m+[m[32m except Exception as x:[m [32m+[m[32m import traceback[m [32m+[m[32m traceback.print_exception(x)[m [32m+[m[32m raise GeminiError(40, 'Temporary failure')[m [32m+[m [32m+[m[32m raise GeminiError(50, 'Permanent failure')[m [1mdiff --git a/gmcapsule/modules/10_rewrite.py b/gmcapsule/modules/10_rewrite.py[m [1mnew file mode 100644[m [1mindex 0000000..584e7a9[m [1m--- /dev/null[m [1m+++ b/gmcapsule/modules/10_rewrite.py[m [36m@@ -0,0 +1,82 @@[m [32m+[m[32m# Copyright (c) 2023 Jaakko Keränen [m [32m+[m[32m# License: BSD-2-Clause[m [32m+[m [32m+[m[32m"""Rewriter"""[m [32m+[m [32m+[m[32mimport re[m [32m+[m [32m+[m [32m+[m[32mclass PathRewriteHandler:[m [32m+[m[32m def __init__(self, capsule, rewritten_path):[m [32m+[m[32m self.capsule = capsule[m [32m+[m[32m self.rewritten_path = rewritten_path[m [32m+[m [32m+[m[32m def __call__(self, req):[m [32m+[m[32m old_path = req.path[m [32m+[m[32m req.path = self.rewritten_path[m [32m+[m [32m+[m[32m # Don't allow rewriting the same request too many times.[m [32m+[m[32m if hasattr(req, 'num_rewrites'):[m [32m+[m[32m req.num_rewrites += 1[m [32m+[m[32m else:[m [32m+[m[32m req.num_rewrites = 1[m [32m+[m[32m if req.num_rewrites == 100:[m [32m+[m[32m return 40, "Stuck in rewrite loop: " + req.url()[m [32m+[m [32m+[m[32m print("[rewrite]", old_path, "->", req.path)[m [32m+[m[32m return self.capsule.call_entrypoint(req)[0][m [32m+[m [32m+[m [32m+[m[32mclass Responder:[m [32m+[m[32m def __init__(self, response):[m [32m+[m[32m self.response = response[m [32m+[m [32m+[m[32m def __call__(self, req):[m [32m+[m[32m return self.response[m [32m+[m [32m+[m [32m+[m[32mclass Rewriter:[m [32m+[m[32m def __init__(self, capsule, protocol, host, src_path, dst_path, status):[m [32m+[m[32m self.capsule = capsule[m [32m+[m[32m self.protocol = protocol[m [32m+[m[32m self.host = host[m [32m+[m[32m self.src_path = src_path[m [32m+[m[32m self.dst_path = dst_path[m [32m+[m[32m self.status = status[m [32m+[m [32m+[m[32m def __call__(self, path):[m [32m+[m[32m # If path matches a rewritten URL, return the handler object that calls the[m [32m+[m[32m # correct handler for the updated URL.[m [32m+[m[32m if self.dst_path:[m [32m+[m[32m new_path = self.src_path.sub(self.dst_path, path)[m [32m+[m[32m if new_path != path:[m [32m+[m[32m return PathRewriteHandler(self.capsule, new_path)[m [32m+[m [32m+[m[32m elif self.status:[m [32m+[m[32m m = self.src_path.match(path)[m [32m+[m[32m if m:[m [32m+[m[32m status = self.status[m [32m+[m[32m for i in range(self.src_path.groups + 1):[m [32m+[m[32m cap = m[i][m [32m+[m[32m if cap:[m [32m+[m[32m status = status.replace(f'\\{i}', cap)[m [32m+[m[32m code, meta = status.split()[m [32m+[m[32m print("[rewrite]", code, meta)[m [32m+[m[32m return Responder((int(code), meta))[m [32m+[m [32m+[m[32m return None[m [32m+[m [32m+[m [32m+[m[32mdef init(capsule):[m [32m+[m[32m cfg = capsule.config()[m [32m+[m[32m for section in cfg.prefixed_sections('rewrite.').values():[m [32m+[m[32m protocol = section.get('protocol', None)[m [32m+[m[32m host = section.get('host', cfg.hostnames()[0])[m [32m+[m[32m src_path = re.compile(section.get('path'))[m [32m+[m[32m dst_path = section.get('repl', None)[m [32m+[m[32m status = section.get('status', None)[m [32m+[m[32m for proto in [protocol] if protocol else ['gemini', 'titan']:[m [32m+[m[32m capsule.add(Rewriter(capsule, proto, host, src_path, dst_path, status),[m [32m+[m[32m None, # `Rewriter` will return a suitable handler callback.[m [32m+[m[32m host,[m [32m+[m[32m proto)[m
text/gemini; charset=utf-8
This content has been proxied by September (ba2dc).