From 5ab0ba40b8019dff5ce05faaf923eb1a6a160241 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaakko=20Kera=CC=88nen?= jaakko.keranen@iki.fi
Date: Fri, 16 Jun 2023 16:02:34 +0300
Subject: [PATCH 1/1] Process Gemini/Titan via a scheme handler; updated docs,
change log
IssueID #2
README.md | 7 ++
docs/api.rst | 8 +-
gmcapsule/init.py | 12 +-
gmcapsule/gemini.py | 261 +++++++++++++++++++++++++-----------------
4 files changed, 173 insertions(+), 115 deletions(-)
diff --git a/README.md b/README.md
index be0fcb5..0b1540a 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ Create the following service file and save it as _~/.config/systemd/user/gmcapsu
[Service]
Type=simple
ExecStart=<YOUR-INSTALL-PATH>/gmcapsuled
Restart=always
Environment="PYTHONUNBUFFERED=1"
StandardOutput=syslog
@@ -46,6 +47,12 @@ The log can be viewed via journalctl (or syslog):
+### v0.5
+* Extension modules can register new protocols in addition to the built-in Gemini and Titan.
+* SIGHUP causes the configuration file to be reloaded and workers to be restarted. The listening socket remains open, so the socket and TLS parameters cannot be changed.
+* API change: Extension modules get initialized separately in each worker thread. Instead of a Capsule
, the extension module init
method is passed a WorkerContext
. Capsule
is no longer available as global state.
diff --git a/docs/api.rst b/docs/api.rst
index 41024fe..75db2ed 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -12,6 +12,7 @@ These classes and functions are available to extension modules by importing
Cache
gemini.Request
gemini.Identity
Classes
@@ -29,7 +30,7 @@ Capsule
:members:
Cache
+-----
.. autoclass:: gmcapsule.Cache
:members:
@@ -43,6 +44,11 @@ Identity
.. autoclass:: gmcapsule.gemini.Identity
:members:
+WorkerContext
+-------------
+.. autoclass:: gmcapsule.WorkerContext
Functions
diff --git a/gmcapsule/init.py b/gmcapsule/init.py
index 7a98b98..ebbeeb5 100644
--- a/gmcapsule/init.py
+++ b/gmcapsule/init.py
@@ -419,16 +419,16 @@ Initialization
Each extension module is required to have an initialization function:
-.. py:function:: extmod.init(capsule)
+.. py:function:: extmod.init(context)
Initialize the extension module.
This is called once immediately after the extension has been loaded.
configuration parameters, install caches, and register entry
points.
points and custom scheme handlers.
Requests
@@ -481,13 +481,13 @@ import shlex
import subprocess
from pathlib import Path
-from .gemini import Server, Cache
+from .gemini import Server, Cache, WorkerContext
from .markdown import to_gemtext as markdown_to_gemtext
version = '0.5.0'
all = [
'get_mime_type', 'markdown_to_gemtext'
]
diff --git a/gmcapsule/gemini.py b/gmcapsule/gemini.py
index a7026a0..4744387 100644
--- a/gmcapsule/gemini.py
+++ b/gmcapsule/gemini.py
@@ -304,6 +304,7 @@ class WorkerContext:
for hostname in self.hostnames:
self.entrypoints[proto][hostname] = []
self.caches = []
self.protocols = {}
self.is_quiet = False
def config(self):
@@ -344,6 +345,18 @@ class WorkerContext:
"""
self.caches.append(cache)
"""
Registers a new protocol handler.
Args:
scheme (str): URL scheme of the protocol.
handler (callable): Handler to be called when a request with the
specified scheme is received. The handler must return the
response to be sent to the client (bytes).
"""
self.protocols[scheme] = handler
def add(self, path, entrypoint, hostname=None, protocol='gemini'):
"""
Register a URL entry point.
@@ -470,16 +483,142 @@ class WorkerContext:
raise GeminiError(50, 'Permanent failure')
+class RequestData:
self.worker = worker
self.stream = stream
self.buffered_data = buffered_data
self.from_addr = from_addr
self.identity = identity
self.request = request
+def handle_gemini_or_titan_request(request_data):
if identity is None and worker.cfg.require_upload_identity():
report_error(stream, 60, "Client certificate required for upload")
return
# Read the rest of the data.
parms = request.split(';')
request = parms[0]
for p in parms:
if p.startswith('size='):
expected_size = int(p[5:])
elif p.startswith('token='):
req_token = p[6:]
elif p.startswith('mime='):
req_mime = p[5:]
worker.log(f'Receiving Titan content: {expected_size}')
max_upload_size = worker.cfg.max_upload_size()
if expected_size > max_upload_size and max_upload_size > 0:
report_error(stream, 59, "Maximum content length exceeded")
return
while len(data) < expected_size:
incoming = safe_recv(stream, 65536)
if len(incoming) == 0:
break
data += incoming
if len(data) != expected_size:
report_error(stream, 59, "Invalid content length")
return
# No Payload in Gemini.
if len(data):
report_error(stream, 59, "Bad request")
return
path = '/'
report_error(stream, 59, "Invalid port number")
return
# Server name indication is required.
report_error(stream, 59, "Missing TLS server name indication")
return
report_error(stream, 53, "Proxy request refused")
return
request = Request(
identity,
remote_address=from_addr,
scheme=url.scheme,
hostname=hostname,
path=path,
query=url.query if '?' in request else None,
content_token=req_token,
content_mime=req_mime,
content=data if len(data) else None
)
response, from_cache = worker.context.call_entrypoint(request)
# Determine status code, meta line, and body content.
if type(response) == tuple:
if len(response) == 2:
status, meta = response
response = ''
else:
status, meta, response = response
else:
status = 20
meta = 'text/gemini; charset=utf-8'
if response == None:
response_data = b''
elif type(response) == str:
response_data = response.encode('utf-8')
else:
response_data = response
safe_sendall(stream, f'{status} {meta}\r\n'.encode('utf-8'))
safe_sendall(stream, response_data)
# Save to cache.
if not from_cache and status == 20 and \
(type(response_data) == bytes or type(response_data) == bytearray):
for cache in worker.context.caches:
if cache.save(hostname + path, meta, response_data):
break
# Close handles.
if hasattr(response_data, 'close'):
response_data.close()
report_error(stream, error.status, str(error))
return
class Worker(threading.Thread):
"""Thread that handles incoming requests from clients."""
def __init__(self, id, cfg, work_queue, shutdown_event):
#super().__init__(target=Worker._run, args=(self,)) # mp
#super().__init__(target=Worker._run, args=(self,)) # multiprocessing
super().__init__()
self.id = id
self.cfg = cfg
self.port = cfg.port()
self.context = WorkerContext(self.cfg, shutdown_event)
self.context.add_protocol('gemini', handle_gemini_or_titan_request)
self.context.add_protocol('titan', handle_gemini_or_titan_request)
self.context.set_quiet(id > 0)
self.jobs = work_queue
@@ -520,9 +659,6 @@ class Worker(threading.Thread):
MAX_LEN = 1024
MAX_RECV = MAX_LEN + 2 # includes terminator "\r\n"
request = None
expected_size = 0
req_token = None
req_mime = None
incoming = safe_recv(stream, MAX_RECV)
try:
@@ -547,117 +683,26 @@ class Worker(threading.Thread):
return
if not request or len(data) > MAX_RECV:
report_error(stream, 59, "Invalid request")
return
if not (request.startswith('gemini:') or request.startswith('titan:')):
report_error(stream, 59, "Unsupported protocol")
report_error(stream, 59, "Bad request")
return
cl_cert = stream.get_peer_certificate()
identity = Identity(cl_cert) if cl_cert else None
if request.startswith('titan:'):
if identity is None and self.cfg.require_upload_identity():
report_error(stream, 60, "Client certificate required for upload")
return
# Read the rest of the data.
parms = request.split(';')
request = parms[0]
for p in parms:
if p.startswith('size='):
expected_size = int(p[5:])
elif p.startswith('token='):
req_token = p[6:]
elif p.startswith('mime='):
req_mime = p[5:]
self.log(f'Receiving Titan content: {expected_size}')
max_upload_size = self.cfg.max_upload_size()
if expected_size > max_upload_size and max_upload_size > 0:
report_error(stream, 59, "Maximum content length exceeded")
return
while len(data) < expected_size:
incoming = safe_recv(stream, 65536)
if len(incoming) == 0:
break
data += incoming
if len(data) != expected_size:
report_error(stream, 59, "Invalid content length")
for scheme, handler in self.context.protocols.items():
if request.startswith(scheme + ':'):
self.log(request)
response_data = handler(RequestData(self,
stream, data, from_addr,
identity, request))
if not response_data is None:
safe_sendall(stream, response_data)
# Close handles.
if hasattr(response_data, 'close'):
response_data.close()
return
else:
# No Payload in Gemini.
if len(data):
report_error(stream, 59, "Gemini disallows request content")
return
self.log(request)
url = urlparse(request)
path = url.path
if path == '':
path = '/'
hostname = url.hostname
if url.port != None and url.port != self.port:
report_error(stream, 59, "Invalid port number")
return
if not stream.get_servername():
# Server name indication is required.
report_error(stream, 59, "Missing TLS server name indication")
return
if stream.get_servername().decode() != hostname:
report_error(stream, 53, "Proxy request refused")
return
try:
request = Request(
identity,
remote_address=from_addr,
scheme=url.scheme,
hostname=hostname,
path=path,
query=url.query if '?' in request else None,
content_token=req_token,
content_mime=req_mime,
content=data if len(data) else None
)
response, from_cache = self.context.call_entrypoint(request)
# Determine status code, meta line, and body content.
if type(response) == tuple:
if len(response) == 2:
status, meta = response
response = ''
else:
status, meta, response = response
else:
status = 20
meta = 'text/gemini; charset=utf-8'
if response == None:
response_data = b''
elif type(response) == str:
response_data = response.encode('utf-8')
else:
response_data = response
safe_sendall(stream, f'{status} {meta}\r\n'.encode('utf-8'))
safe_sendall(stream, response_data)
# Save to cache.
if not from_cache and status == 20 and \
(type(response_data) == bytes or type(response_data) == bytearray):
for cache in self.context.caches:
if cache.save(hostname + path, meta, response_data):
break
# Close handles.
if hasattr(response_data, 'close'):
response_data.close()
except GeminiError as error:
report_error(stream, error.status, str(error))
return
report_error(stream, 59, "Unsupported protocol")
_server_instance = None
--
2.25.1
text/plain
This content has been proxied by September (ba2dc).