GmCapsule [gsorg-style]

Process Gemini/Titan via a scheme handler; updated docs, change log

=> 5ab0ba40b8019dff5ce05faaf923eb1a6a160241

diff --git a/README.md b/README.md
index be0fcb5..0b1540a 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ Create the following service file and save it as _~/.config/systemd/user/gmcapsu
     [Service]
     Type=simple
     ExecStart=/gmcapsuled
+    ExecReload=/bin/kill -HUP $MAINPID
     Restart=always
     Environment="PYTHONUNBUFFERED=1"
     StandardOutput=syslog
@@ -46,6 +47,12 @@ The log can be viewed via journalctl (or syslog):
 
 ## Change log
 
+### v0.5
+
+* Extension modules can register new protocols in addition to the built-in Gemini and Titan.
+* SIGHUP causes the configuration file to be reloaded and workers to be restarted. The listening socket remains open, so the socket and TLS parameters cannot be changed.
+* API change: Extension modules get initialized separately in each worker thread. Instead of a `Capsule`, the extension module `init` method is passed a `WorkerContext`. `Capsule` is no longer available as global state.
+
 ### v0.4
 
 * Added built-in module "rewrite" that matches regular expressions against the request path and can rewrite the path or return a custom status for redirection, "Gone" messages, or other exceptional situations.
diff --git a/docs/api.rst b/docs/api.rst
index 41024fe..75db2ed 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -12,6 +12,7 @@ These classes and functions are available to extension modules by importing
    Cache
    gemini.Request
    gemini.Identity
+   WorkerContext
 
 
 Classes
@@ -29,7 +30,7 @@ Capsule
     :members:
 
 Cache
-------
+-----
 .. autoclass:: gmcapsule.Cache
     :members:
 
@@ -43,6 +44,11 @@ Identity
 .. autoclass:: gmcapsule.gemini.Identity
     :members:
 
+WorkerContext
+-------------
+.. autoclass:: gmcapsule.WorkerContext
+    :members:
+
 
 Functions
 *********
diff --git a/gmcapsule/__init__.py b/gmcapsule/__init__.py
index 7a98b98..ebbeeb5 100644
--- a/gmcapsule/__init__.py
+++ b/gmcapsule/__init__.py
@@ -419,16 +419,16 @@ Initialization
 
 Each extension module is required to have an initialization function:
 
-.. py:function:: extmod.init(capsule)
+.. py:function:: extmod.init(context)
 
     Initialize the extension module.
 
     This is called once immediately after the extension has been loaded.
 
-    :param capsule: Server instance. The extension can use this to access
+    :param context: Worker context. The extension can use this to access
         configuration parameters, install caches, and register entry
-        points.
-    :type capsule: gmcapsule.Capsule
+        points and custom scheme handlers.
+    :type context: gmcapsule.WorkerContext
 
 
 Requests
@@ -481,13 +481,13 @@ import shlex
 import subprocess
 from pathlib import Path
 
-from .gemini import Server, Cache
+from .gemini import Server, Cache, WorkerContext
 from .markdown import to_gemtext as markdown_to_gemtext
 
 
 __version__ = '0.5.0'
 __all__ = [
-    'Config', 'Cache',
+    'Config', 'Cache', 'WorkerContext',
     'get_mime_type', 'markdown_to_gemtext'
 ]
 
diff --git a/gmcapsule/gemini.py b/gmcapsule/gemini.py
index a7026a0..4744387 100644
--- a/gmcapsule/gemini.py
+++ b/gmcapsule/gemini.py
@@ -304,6 +304,7 @@ class WorkerContext:
             for hostname in self.hostnames:
                 self.entrypoints[proto][hostname] = []
         self.caches = []
+        self.protocols = {}
         self.is_quiet = False
 
     def config(self):
@@ -344,6 +345,18 @@ class WorkerContext:
         """
         self.caches.append(cache)
 
+    def add_protocol(self, scheme, handler):
+        """
+        Registers a new protocol handler.
+
+        Args:
+            scheme (str): URL scheme of the protocol.
+            handler (callable): Handler to be called when a request with the
+                specified scheme is received. The handler must return the
+                response to be sent to the client (bytes).
+        """
+        self.protocols[scheme] = handler
+
     def add(self, path, entrypoint, hostname=None, protocol='gemini'):
         """
         Register a URL entry point.
@@ -470,16 +483,142 @@ class WorkerContext:
         raise GeminiError(50, 'Permanent failure')
 
 
+class RequestData:
+    """Encapsules data about an incoming request, before it has been fully parsed."""
+
+    def __init__(self, worker, stream, buffered_data, from_addr, identity, request):
+        self.worker = worker
+        self.stream = stream
+        self.buffered_data = buffered_data
+        self.from_addr = from_addr
+        self.identity = identity
+        self.request = request
+
+
+def handle_gemini_or_titan_request(request_data):
+    worker = request_data.worker
+    stream = request_data.stream
+    data = request_data.buffered_data
+    from_addr = request_data.from_addr
+    identity = request_data.identity
+    request = request_data.request
+    expected_size = 0
+    req_token = None
+    req_mime = None
+
+    if request.startswith('titan:'):
+        if identity is None and worker.cfg.require_upload_identity():
+            report_error(stream, 60, "Client certificate required for upload")
+            return
+        # Read the rest of the data.
+        parms = request.split(';')
+        request = parms[0]
+        for p in parms:
+            if p.startswith('size='):
+                expected_size = int(p[5:])
+            elif p.startswith('token='):
+                req_token = p[6:]
+            elif p.startswith('mime='):
+                req_mime = p[5:]
+        worker.log(f'Receiving Titan content: {expected_size}')
+        max_upload_size = worker.cfg.max_upload_size()
+        if expected_size > max_upload_size and max_upload_size > 0:
+            report_error(stream, 59, "Maximum content length exceeded")
+            return
+        while len(data) < expected_size:
+            incoming = safe_recv(stream, 65536)
+            if len(incoming) == 0:
+                break
+            data += incoming
+        if len(data) != expected_size:
+            report_error(stream, 59, "Invalid content length")
+            return
+    else:
+        # No Payload in Gemini.
+        if len(data):
+            report_error(stream, 59, "Bad request")
+            return
+
+    url = urlparse(request)
+    path = url.path
+    if path == '':
+        path = '/'
+    hostname = url.hostname
+
+    if url.port != None and url.port != worker.port:
+        report_error(stream, 59, "Invalid port number")
+        return
+    if not stream.get_servername():
+        # Server name indication is required.
+        report_error(stream, 59, "Missing TLS server name indication")
+        return
+    if stream.get_servername().decode() != hostname:
+        report_error(stream, 53, "Proxy request refused")
+        return
+
+    try:
+        request = Request(
+            identity,
+            remote_address=from_addr,
+            scheme=url.scheme,
+            hostname=hostname,
+            path=path,
+            query=url.query if '?' in request else None,
+            content_token=req_token,
+            content_mime=req_mime,
+            content=data if len(data) else None
+        )
+        response, from_cache = worker.context.call_entrypoint(request)
+
+        # Determine status code, meta line, and body content.
+        if type(response) == tuple:
+            if len(response) == 2:
+                status, meta = response
+                response = ''
+            else:
+                status, meta, response = response
+        else:
+            status = 20
+            meta = 'text/gemini; charset=utf-8'
+
+        if response == None:
+            response_data = b''
+        elif type(response) == str:
+            response_data = response.encode('utf-8')
+        else:
+            response_data = response
+
+        safe_sendall(stream, f'{status} {meta}\r\n'.encode('utf-8'))
+        safe_sendall(stream, response_data)
+
+        # Save to cache.
+        if not from_cache and status == 20 and \
+                (type(response_data) == bytes or type(response_data) == bytearray):
+            for cache in worker.context.caches:
+                if cache.save(hostname + path, meta, response_data):
+                    break
+
+        # Close handles.
+        if hasattr(response_data, 'close'):
+            response_data.close()
+
+    except GeminiError as error:
+        report_error(stream, error.status, str(error))
+        return
+
+
 class Worker(threading.Thread):
     """Thread that handles incoming requests from clients."""
 
     def __init__(self, id, cfg, work_queue, shutdown_event):
-        #super().__init__(target=Worker._run, args=(self,)) # mp
+        #super().__init__(target=Worker._run, args=(self,))     # multiprocessing
         super().__init__()
         self.id = id
         self.cfg = cfg
         self.port = cfg.port()
         self.context = WorkerContext(self.cfg, shutdown_event)
+        self.context.add_protocol('gemini', handle_gemini_or_titan_request)
+        self.context.add_protocol('titan', handle_gemini_or_titan_request)
         self.context.set_quiet(id > 0)
         self.jobs = work_queue
 
@@ -520,9 +659,6 @@ class Worker(threading.Thread):
         MAX_LEN = 1024
         MAX_RECV = MAX_LEN + 2  # includes terminator "\r\n"
         request = None
-        expected_size = 0
-        req_token = None
-        req_mime = None
         incoming = safe_recv(stream, MAX_RECV)
 
         try:
@@ -547,117 +683,26 @@ class Worker(threading.Thread):
             return
 
         if not request or len(data) > MAX_RECV:
-            report_error(stream, 59, "Invalid request")
-            return
-        if not (request.startswith('gemini:') or request.startswith('titan:')):
-            report_error(stream, 59, "Unsupported protocol")
+            report_error(stream, 59, "Bad request")
             return
 
         cl_cert = stream.get_peer_certificate()
         identity = Identity(cl_cert) if cl_cert else None
 
-        if request.startswith('titan:'):
-            if identity is None and self.cfg.require_upload_identity():
-                report_error(stream, 60, "Client certificate required for upload")
-                return
-
-            # Read the rest of the data.
-            parms = request.split(';')
-            request = parms[0]
-            for p in parms:
-                if p.startswith('size='):
-                    expected_size = int(p[5:])
-                elif p.startswith('token='):
-                    req_token = p[6:]
-                elif p.startswith('mime='):
-                    req_mime = p[5:]
-            self.log(f'Receiving Titan content: {expected_size}')
-            max_upload_size = self.cfg.max_upload_size()
-            if expected_size > max_upload_size and max_upload_size > 0:
-                report_error(stream, 59, "Maximum content length exceeded")
-                return
-            while len(data) < expected_size:
-                incoming = safe_recv(stream, 65536)
-                if len(incoming) == 0:
-                    break
-                data += incoming
-            if len(data) != expected_size:
-                report_error(stream, 59, "Invalid content length")
+        for scheme, handler in self.context.protocols.items():
+            if request.startswith(scheme + ':'):
+                self.log(request)
+                response_data = handler(RequestData(self,
+                                                    stream, data, from_addr,
+                                                    identity, request))
+                if not response_data is None:
+                    safe_sendall(stream, response_data)
+                    # Close handles.
+                    if hasattr(response_data, 'close'):
+                        response_data.close()
                 return
-        else:
-            # No Payload in Gemini.
-            if len(data):
-                report_error(stream, 59, "Gemini disallows request content")
-                return
-
-        self.log(request)
-
-        url = urlparse(request)
-        path = url.path
-        if path == '':
-            path = '/'
-        hostname = url.hostname
 
-        if url.port != None and url.port != self.port:
-            report_error(stream, 59, "Invalid port number")
-            return
-        if not stream.get_servername():
-            # Server name indication is required.
-            report_error(stream, 59, "Missing TLS server name indication")
-            return
-        if stream.get_servername().decode() != hostname:
-            report_error(stream, 53, "Proxy request refused")
-            return
-
-        try:
-            request = Request(
-                identity,
-                remote_address=from_addr,
-                scheme=url.scheme,
-                hostname=hostname,
-                path=path,
-                query=url.query if '?' in request else None,
-                content_token=req_token,
-                content_mime=req_mime,
-                content=data if len(data) else None
-            )
-            response, from_cache = self.context.call_entrypoint(request)
-
-            # Determine status code, meta line, and body content.
-            if type(response) == tuple:
-                if len(response) == 2:
-                    status, meta = response
-                    response = ''
-                else:
-                    status, meta, response = response
-            else:
-                status = 20
-                meta = 'text/gemini; charset=utf-8'
-
-            if response == None:
-                response_data = b''
-            elif type(response) == str:
-                response_data = response.encode('utf-8')
-            else:
-                response_data = response
-
-            safe_sendall(stream, f'{status} {meta}\r\n'.encode('utf-8'))
-            safe_sendall(stream, response_data)
-
-            # Save to cache.
-            if not from_cache and status == 20 and \
-                    (type(response_data) == bytes or type(response_data) == bytearray):
-                for cache in self.context.caches:
-                    if cache.save(hostname + path, meta, response_data):
-                        break
-
-            # Close handles.
-            if hasattr(response_data, 'close'):
-                response_data.close()
-
-        except GeminiError as error:
-            report_error(stream, error.status, str(error))
-            return
+        report_error(stream, 59, "Unsupported protocol")
 
 
 _server_instance = None
Proxy Information
Original URL
gemini://git.skyjake.fi/gmcapsule/gsorg-style/cdiff/5ab0ba40b8019dff5ce05faaf923eb1a6a160241
Status Code
Success (20)
Meta
text/gemini; charset=utf-8
Capsule Response Time
78.856537 milliseconds
Gemini-to-HTML Time
0.758012 milliseconds

This content has been proxied by September (ba2dc).