From 5e31f66313e2dc15fb9e75395504feb0992c3feb Mon Sep 17 00:00:00 2001

From: =?UTF-8?q?Jaakko=20Kera=CC=88nen?= jaakko.keranen@iki.fi

Date: Mon, 27 Dec 2021 10:00:17 +0200

Subject: [PATCH 1/1] Reserved characters in URLs

Making URL encoding a little less convoluted. Now when sending out a request, the URL is fully encoded except for reserved characters. In the internal representation, non-ASCII characters are in decoded form (i.e., IRI).

This means that if the user enters a URL in the input field manually, its non-ASCII characters will be percent encoded as well. However, in this case the user is expected to manually escape all reserved characters because the input field can't tell the difference between what is intended to be a reserved separator and what isn't. For example, a server might expect &-separated fields, and if the user enters such fields manually in the URL field, they shouldn't be converted to %26.

When forming a query URL in the input dialog, user-entered text is fully percent-encoded because in that case the input is just a generic text string.

IssueID #410


src/app.c | 6 ------

src/gmrequest.c | 6 ++++--

src/gmutil.c | 30 ++++++++++++++++++++++++++----

src/gmutil.h | 2 ++

src/ui/inputwidget.c | 10 ++++++++--

5 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/src/app.c b/src/app.c

index 3b4c24f0..73cc35ee 100644

--- a/src/app.c

+++ b/src/app.c

@@ -2791,12 +2791,6 @@ iBool handleCommand_App(const char *cmd) {

     setRedirectCount_DocumentWidget(doc, redirectCount);

     setOrigin_DocumentWidget(doc, origin);

     showCollapsed_Widget(findWidget_App("document.progress"), iFalse);

     setUrlFlags_DocumentWidget(doc, url,

        isHistory ? useCachedContentIfAvailable_DocumentWidgetSetUrlFlag : 0);

     /* Optionally, jump to a text in the document. This will only work if the document

diff --git a/src/gmrequest.c b/src/gmrequest.c

index a9c5919d..c23e8499 100644

--- a/src/gmrequest.c

+++ b/src/gmrequest.c

@@ -585,8 +585,10 @@ void setUrl_GmRequest(iGmRequest *d, const iString *url) {

 /* TODO: Gemini spec allows UTF-8 encoded URLs, but still need to percent-encode non-ASCII

    characters? Could be a server-side issue, e.g., if they're using a URL parser meant for

    the web. */

 d->identity = identityForUrl_GmCerts(d->certs, &d->url);

}

diff --git a/src/gmutil.c b/src/gmutil.c

index 79462e41..98e4d4d6 100644

--- a/src/gmutil.c

+++ b/src/gmutil.c

@@ -330,6 +330,28 @@ void urlEncodePath_String(iString *d) {

 delete_String(encoded);

}

+void urlEncodeQuery_String(iString *d) {

+}

iBool isKnownScheme_Rangecc(iRangecc scheme) {

 if (isKnownUrlScheme_Rangecc(scheme)) {

     return iTrue;

@@ -667,20 +689,20 @@ const iString *canonicalUrl_String(const iString *d) {

 iString *canon = NULL;

 iUrl parts;

 init_Url(&parts, d);

 if (iStrStrN(parts.path.start, "%3A", size_Range(&parts.path)) ||

     iStrStrN(parts.path.start, "%3a", size_Range(&parts.path))) {

     /* This is done separately to avoid the copy if %3A is not present; it's rare. */

     canon = copy_String(d);

     urlDecodePath_String(canon);

     if (dec) {

         set_String(canon, dec);

         delete_String(dec);

     }

 }

 else {

 }

 /* `canon` may now be NULL if nothing was decoded. */

 if (indexOfCStr_String(canon ? canon : d, " ") != iInvalidPos ||

@@ -689,7 +711,7 @@ const iString *canonicalUrl_String(const iString *d) {

         canon = copy_String(d);

     }

     urlEncodeSpaces_String(canon);

 return canon ? collect_String(canon) : d;

}

diff --git a/src/gmutil.h b/src/gmutil.h

index 6d337eeb..15bb7b2e 100644

--- a/src/gmutil.h

+++ b/src/gmutil.h

@@ -100,6 +100,7 @@ iRegExp * newGemtextLink_RegExp (void);

#define GEMINI_DEFAULT_PORT ((uint16_t) 1965)

#define GEMINI_DEFAULT_PORT_CSTR "1965"

+#define URL_RESERVED_CHARS ":/?#[]@!$&'()+,;=" / RFC 3986 */

struct Impl_Url {

 iRangecc scheme;

@@ -131,6 +132,7 @@ const iString * urlFragmentStripped_String(const iString *);

const iString * urlQueryStripped_String (const iString *);

void urlDecodePath_String (iString *);

void urlEncodePath_String (iString *);

+void urlEncodeQuery_String (iString *);

iString * makeFileUrl_String (const iString *localFilePath);

const char * makeFileUrl_CStr (const char *localFilePath);

iString * localFilePathFromUrl_String(const iString *);

diff --git a/src/ui/inputwidget.c b/src/ui/inputwidget.c

index b94e0c27..24983d69 100644

--- a/src/ui/inputwidget.c

+++ b/src/ui/inputwidget.c

@@ -1100,9 +1100,15 @@ static void updateBuffered_InputWidget_(iInputWidget *d) {

void setText_InputWidget(iInputWidget *d, const iString *text) {

 if (!d) return;

 if (d->inFlags & isUrl_InputWidgetFlag) {

         iString *enc = collect_String(copy_String(text));

         /* Prevent address bar spoofing (mentioned as IDN homograph attack in

            https://github.com/skyjake/lagrange/issues/73) */

         punyEncodeUrlHost_String(enc);

--

2.25.1

Proxy Information
Original URL
gemini://git.skyjake.fi/lagrange/work%2Fv1.10/patch/5e31f66313e2dc15fb9e75395504feb0992c3feb.patch
Status Code
Success (20)
Meta
text/plain
Capsule Response Time
142.819076 milliseconds
Gemini-to-HTML Time
2.765023 milliseconds

This content has been proxied by September (ba2dc).