diff --git a/README.md b/README.md

index 823db45..9f8f074 100644

--- a/README.md

+++ b/README.md

@@ -37,7 +37,7 @@ Replace <YOUR-INSTALL-PATH> with the actual path of gmcapsuled. pip will i

Then you can do the usual:



 systemctl --user daemon-reload

- systemctl --user enable gmcapsule.service

+ systemctl --user enable gmcapsule

 systemctl --user start gmcapsule



The log can be viewed via journalctl (or syslog):

@@ -46,6 +46,12 @@ The log can be viewed via journalctl (or syslog):



Change log



+### v0.4

+

+* Added built-in module "rewrite" that matches regular expressions against the request path and can rewrite the path or return a custom status for redirection, "Gone" messages, or other exceptional situations.

+* Extension module load order is determined after locating all modules from all directories. Previously, the order was local to each directory.

+* Added a new configuration section [priority] for overriding default module priorities determined from file names. This is useful for changing the priority of the built-in modules.

+

v0.3



diff --git a/gmcapsule/init.py b/gmcapsule/init.py

index 3afb139..f93f021 100644

--- a/gmcapsule/init.py

+++ b/gmcapsule/init.py

@@ -58,8 +58,9 @@ The GmCapsule configuration file is in `INI format

defined:



-- :ref:static — serving static files

+- :ref:static — serving static files

+- :ref:rewrite.* — URL rewriting rules

@@ -132,6 +133,50 @@ root : path [path...]

 files will be served from `/home/user/gemini/example.com/`.





+rewrite.*

+---------

+

+Settings for the rewrite module that checks regular expressions against

+the request path and can rewrite the path or return a custom status. You can

+use this for internal remapping of directories and files, redirections,

+"Gone" statuses, or other exceptional situations.

+

+Each rewriting rule is a section that begins with rewrite..

+

+.. code-block:: ini

+

+ [rewrite.rename]

+ path = ^/old-path/

+ repl = /new-location/

+

+ [rewrite.elsewhere]

+ path = .*\.gmi$

+ status = 31 gemini://mygemlog.space/\1.gmi

+

+protocol : string

+ Protocol for the rewrite rule. If omitted, the rule applies to both

+ gemini and titan.

+

+host : string

+ Hostname for the rewrite rule. If omitted, defaults to the first

+ hostname defined in the :ref:server section.

+

+path : string

+ Regular expression that is matched against the request path. You may use

+ capture groups and refer to them in the replacement text. Note that the

+ request path always begins with a slash.

+

+repl : string

+ Replacement path. The part of the request path that matches the "path"

+ pattern is replaced with this. You can use backslashes to refer to

+ capture groups (\\1).

+

+status : string

+ Custom status to respond with. Must begin with the status code followed

+ by the meta line. You can use backslashes to refer to capture groups

+ (\\1).

+

+

cgi

---



@@ -440,7 +485,7 @@ from .gemini import Server, Cache

from .markdown import to_gemtext as markdown_to_gemtext





-version = '0.3.2'

+__version__ = '0.4.0'

all = [

 'Config', 'Capsule', 'Cache',

 'get_mime_type', 'markdown_to_gemtext'

@@ -629,24 +674,40 @@ class Capsule:

     self.sv.add_cache(cache)



 def load_modules(self):

- name_pattern = re.compile(r'[0-9][0-9]_(.*).py')

+ # The configuration can override default priorities.

+ mod_priority = {}

+ if 'priority' in self.cfg.ini:

+ for name, priority in self.cfg.section('priority').items():

+ mod_priority[name] = int(priority)

+

+ # We will load all recognized modules.

+ name_pattern = re.compile(r'([0-9][0-9])_(.*).py')

     dirs = []

     for user_dir in self.cfg.mod_dirs():

         if user_dir not in dirs:

             dirs.append(user_dir)

     dirs += [Path(__file__).parent.resolve() / 'modules']

+ mods = []

     for mdir in dirs:

         for mod_file in sorted(os.listdir(mdir)):

             m = name_pattern.match(mod_file)

             if m:

                 path = (mdir / mod_file).resolve()

- name = m.group(1)

+ name = m.group(2)

                 loader = importlib.machinery.SourceFileLoader(name, str(path))

                 spec = importlib.util.spec_from_loader(name, loader)

                 mod = importlib.util.module_from_spec(spec)

                 loader.exec_module(mod)

- print('MODULE:', mod.doc)

- mod.init(self)

+ if name in mod_priority:

+ priority = mod_priority[name]

+ else:

+ priority = int(m.group(1))

+ mods.append((priority, name, mod))

+

+ # Initialize in priority order.

+ for _, _, mod in sorted(mods):

+ print(f'Init:', mod.doc)

+ mod.init(self)



 def shutdown_event(self):

     """

@@ -657,6 +718,20 @@ class Capsule:

     """

     return self.sv.shutdown_event



+ def call_entrypoint(self, request):

+ """

+ Calls the registered entry point for a request.

+

+ Args:

+ request (Request): Request object.

+

+ Returns:

+ Tuple with (response, cache). The response can be binary data, text,

+ tuple with status and meta string, or tuple with status, meta, and body.

+ The cache is None if the data was not read from a cache.

+ """

+ return self.sv.call_entrypoint(request)

+

 def run(self):

     """

     Start worker threads and begin accepting incoming connections. The

diff --git a/gmcapsule/gemini.py b/gmcapsule/gemini.py

index 05794f7..26ded98 100644

--- a/gmcapsule/gemini.py

+++ b/gmcapsule/gemini.py

@@ -16,6 +16,12 @@ import OpenSSL.crypto

from OpenSSL import SSL, crypto





+class GeminiError(Exception):

+ def init(self, status, msg):

+ Exception.init(self, msg)

+ self.status = status

+

+

class AbortedIOError(Exception):

 def __init__(self, msg):

     Exception.__init__(self, msg)

@@ -271,6 +277,9 @@ class Request:

     self.content_mime = content_mime

     self.content = content



+ def url(self):

+ return f'{self.scheme}://{self.hostname}{self.path}{"?" + self.query if self.query else ""}'

+



def verify_callback(connection, cert, err_num, err_depth, ret_code):

 #print("verify_callback:", connection, cert, ret_code)

@@ -445,72 +454,42 @@ class Worker(threading.Thread):



     url = urlparse(request)

     path = url.path

- if url.port != None and url.port != self.server.port:

- report_error(stream, 59, "Invalid port number")

- return

     if path == '':

         path = '/'

     hostname = url.hostname

- entrypoint = self.server.find_entrypoint(url.scheme, hostname, path)



- # Server name indication is required.

+ if url.port != None and url.port != self.server.port:

+ report_error(stream, 59, "Invalid port number")

+ return

     if not stream.get_servername():

+ # Server name indication is required.

         report_error(stream, 59, "Missing TLS server name indication")

         return

- if stream.get_servername().decode() != url.hostname:

+ if stream.get_servername().decode() != hostname:

         report_error(stream, 53, "Proxy request refused")

         return



- caches = [] if (url.scheme != 'gemini' or identity or len(url.query) > 0) \

- else self.server.caches

- from_cache = None

-

- # print(f'Request : {request}')

- # print(f'Cert : {cl_cert}')

-

- if entrypoint:

- # Check the caches first.

- for cache in caches:

- media, content = cache.try_load(hostname + path)

- if not media is None:

- response = 20, media, content

- from_cache = cache

- if hasattr(content, 'len'):

- print('%d bytes from cache, %s' % (len(content), media))

- else:

- print('stream from cache,', media)

- break

-

- # Process the request normally if there is nothing cached.

- if not from_cache:

- try:

- response = entrypoint(Request(

- identity,

- remote_address=from_addr,

- scheme=url.scheme,

- hostname=hostname,

- path=path,

- query=url.query if '?' in request else None,

- content_token=req_token,

- content_mime=req_mime,

- content=data if len(data) else None

- ))

- except Exception as x:

- import traceback

- traceback.print_exception(x)

- report_error(stream, 40, 'Temporary failure')

- return

+ try:

+ request = Request(

+ identity,

+ remote_address=from_addr,

+ scheme=url.scheme,

+ hostname=hostname,

+ path=path,

+ query=url.query if '?' in request else None,

+ content_token=req_token,

+ content_mime=req_mime,

+ content=data if len(data) else None

+ )

+ response, from_cache = self.server.call_entrypoint(request)



         # Determine status code, meta line, and body content.

         if type(response) == tuple:

             if len(response) == 2:

- status = response[0]

- meta = response[1]

+ status, meta = response

                 response = ''

             else:

- status = response[0]

- meta = response[1]

- response = response[2]

+ status, meta, response = response

         else:

             status = 20

             meta = 'text/gemini; charset=utf-8'

@@ -528,7 +507,7 @@ class Worker(threading.Thread):

         # Save to cache.

         if not from_cache and status == 20 and \

                 (type(response_data) == bytes or type(response_data) == bytearray):

- for cache in caches:

+ for cache in self.server.caches:

                 if cache.save(hostname + path, meta, response_data):

                     break



@@ -536,8 +515,9 @@ class Worker(threading.Thread):

         if hasattr(response_data, 'close'):

             response_data.close()



- else:

- report_error(stream, 50, 'Permanent failure')

+ except GeminiError as error:

+ report_error(stream, error.status, str(error))

+ return





class Server:

@@ -693,7 +673,39 @@ class Server:

                 handler = path_pattern(path)

                 if handler:

                     return handler

- except:

+ except Exception as x:

+ print(x)

         return None



     return None

+

+ def call_entrypoint(self, request):

+ entrypoint = self.find_entrypoint(request.scheme, request.hostname, request.path)

+

+ caches = self.caches if (request.scheme == 'gemini' and

+ not request.identity and

+ not request.query) else []

+ from_cache = None

+

+ if entrypoint:

+ # Check the caches first.

+ for cache in caches:

+ media, content = cache.try_load(request.hostname + request.path)

+ if not media is None:

+ response = 20, media, content

+ if hasattr(content, 'len'):

+ print('%d bytes from cache, %s' % (len(content), media))

+ else:

+ print('stream from cache,', media)

+ return response, cache

+

+ # Process the request normally if there is nothing cached.

+ if not from_cache:

+ try:

+ return entrypoint(request), None

+ except Exception as x:

+ import traceback

+ traceback.print_exception(x)

+ raise GeminiError(40, 'Temporary failure')

+

+ raise GeminiError(50, 'Permanent failure')

diff --git a/gmcapsule/modules/10_rewrite.py b/gmcapsule/modules/10_rewrite.py

new file mode 100644

index 0000000..584e7a9

--- /dev/null

+++ b/gmcapsule/modules/10_rewrite.py

@@ -0,0 +1,82 @@

+# Copyright (c) 2023 Jaakko Keränen jaakko.keranen@iki.fi

+# License: BSD-2-Clause

+

+"""Rewriter"""

+

+import re

+

+

+class PathRewriteHandler:

+ def init(self, capsule, rewritten_path):

+ self.capsule = capsule

+ self.rewritten_path = rewritten_path

+

+ def call(self, req):

+ old_path = req.path

+ req.path = self.rewritten_path

+

+ # Don't allow rewriting the same request too many times.

+ if hasattr(req, 'num_rewrites'):

+ req.num_rewrites += 1

+ else:

+ req.num_rewrites = 1

+ if req.num_rewrites == 100:

+ return 40, "Stuck in rewrite loop: " + req.url()

+

+ print("[rewrite]", old_path, "->", req.path)

+ return self.capsule.call_entrypoint(req)[0]

+

+

+class Responder:

+ def init(self, response):

+ self.response = response

+

+ def call(self, req):

+ return self.response

+

+

+class Rewriter:

+ def init(self, capsule, protocol, host, src_path, dst_path, status):

+ self.capsule = capsule

+ self.protocol = protocol

+ self.host = host

+ self.src_path = src_path

+ self.dst_path = dst_path

+ self.status = status

+

+ def call(self, path):

+ # If path matches a rewritten URL, return the handler object that calls the

+ # correct handler for the updated URL.

+ if self.dst_path:

+ new_path = self.src_path.sub(self.dst_path, path)

+ if new_path != path:

+ return PathRewriteHandler(self.capsule, new_path)

+

+ elif self.status:

+ m = self.src_path.match(path)

+ if m:

+ status = self.status

+ for i in range(self.src_path.groups + 1):

+ cap = m[i]

+ if cap:

+ status = status.replace(f'\{i}', cap)

+ code, meta = status.split()

+ print("[rewrite]", code, meta)

+ return Responder((int(code), meta))

+

+ return None

+

+

+def init(capsule):

+ cfg = capsule.config()

+ for section in cfg.prefixed_sections('rewrite.').values():

+ protocol = section.get('protocol', None)

+ host = section.get('host', cfg.hostnames()[0])

+ src_path = re.compile(section.get('path'))

+ dst_path = section.get('repl', None)

+ status = section.get('status', None)

+ for proto in [protocol] if protocol else ['gemini', 'titan']:

+ capsule.add(Rewriter(capsule, proto, host, src_path, dst_path, status),

+ None, # Rewriter will return a suitable handler callback.

+ host,

+ proto)

Proxy Information
Original URL
gemini://git.skyjake.fi/gmcapsule/gsorg-style/pcdiff/375360214ce32bb545c0fd8beef676e90d36980a
Status Code
Success (20)
Meta
text/plain
Capsule Response Time
31.782845 milliseconds
Gemini-to-HTML Time
5.894459 milliseconds

This content has been proxied by September (ba2dc).