Lagrange [work/v1.10]

Reserved characters in URLs

=> 5e31f66313e2dc15fb9e75395504feb0992c3feb

diff --git a/src/app.c b/src/app.c
index 3b4c24f0..73cc35ee 100644
--- a/src/app.c
+++ b/src/app.c
@@ -2791,12 +2791,6 @@ iBool handleCommand_App(const char *cmd) {
         setRedirectCount_DocumentWidget(doc, redirectCount);
         setOrigin_DocumentWidget(doc, origin);
         showCollapsed_Widget(findWidget_App("document.progress"), iFalse);
-        if (prefs_App()->decodeUserVisibleURLs) {
-            urlDecodePath_String(url);
-        }
-        else {
-            urlEncodePath_String(url);
-        }            
         setUrlFlags_DocumentWidget(doc, url,
            isHistory ? useCachedContentIfAvailable_DocumentWidgetSetUrlFlag : 0);
         /* Optionally, jump to a text in the document. This will only work if the document
diff --git a/src/gmrequest.c b/src/gmrequest.c
index a9c5919d..c23e8499 100644
--- a/src/gmrequest.c
+++ b/src/gmrequest.c
@@ -585,8 +585,10 @@ void setUrl_GmRequest(iGmRequest *d, const iString *url) {
     /* TODO: Gemini spec allows UTF-8 encoded URLs, but still need to percent-encode non-ASCII
        characters? Could be a server-side issue, e.g., if they're using a URL parser meant for
        the web. */
-    urlEncodePath_String(&d->url);
-    urlEncodeSpaces_String(&d->url);
+    /* Encode everything except already-percent encoded characters. */
+    iString *enc = urlEncodeExclude_String(&d->url, "%" URL_RESERVED_CHARS);
+    set_String(&d->url, enc);
+    delete_String(enc);
     d->identity = identityForUrl_GmCerts(d->certs, &d->url);
 }
 
diff --git a/src/gmutil.c b/src/gmutil.c
index 79462e41..98e4d4d6 100644
--- a/src/gmutil.c
+++ b/src/gmutil.c
@@ -330,6 +330,28 @@ void urlEncodePath_String(iString *d) {
     delete_String(encoded);
 }
 
+void urlEncodeQuery_String(iString *d) {
+    iUrl url;
+    init_Url(&url, d);
+    if (isEmpty_Range(&url.query)) {
+        return;
+    }
+    iString encoded;
+    init_String(&encoded);
+    appendRange_String(&encoded, (iRangecc){ constBegin_String(d), url.query.start });
+    iString query;
+    url.query.start++; /* omit the question mark */
+    initRange_String(&query, url.query);
+    iString *encQuery = urlEncode_String(&query); /* fully encoded */
+    appendCStr_String(&encoded, "?");
+    append_String(&encoded, encQuery);    
+    delete_String(encQuery);
+    deinit_String(&query);
+    appendRange_String(&encoded, (iRangecc){ url.query.end, constEnd_String(d) });
+    set_String(d, &encoded);
+    deinit_String(&encoded);
+}
+
 iBool isKnownScheme_Rangecc(iRangecc scheme) {
     if (isKnownUrlScheme_Rangecc(scheme)) {
         return iTrue;
@@ -667,20 +689,20 @@ const iString *canonicalUrl_String(const iString *d) {
     iString *canon = NULL;
     iUrl parts;
     init_Url(&parts, d);
-    /* Colons are in decoded form in the URL path. */
+    /* Colons (0x3a) are in decoded form in the URL path. */
     if (iStrStrN(parts.path.start, "%3A", size_Range(&parts.path)) ||
         iStrStrN(parts.path.start, "%3a", size_Range(&parts.path))) {
         /* This is done separately to avoid the copy if %3A is not present; it's rare. */
         canon = copy_String(d);
         urlDecodePath_String(canon);
-        iString *dec = maybeUrlDecodeExclude_String(canon, "%/?:;#&+= "); /* decode everything else in all parts */
+        iString *dec = maybeUrlDecodeExclude_String(canon, "% " URL_RESERVED_CHARS); /* decode everything else in all parts */
         if (dec) {
             set_String(canon, dec);
             delete_String(dec);
         }
     }
     else {
-        canon = maybeUrlDecodeExclude_String(d, "%/?:;#&+= ");
+        canon = maybeUrlDecodeExclude_String(d, "% " URL_RESERVED_CHARS);
     }
     /* `canon` may now be NULL if nothing was decoded. */
     if (indexOfCStr_String(canon ? canon : d, " ") != iInvalidPos ||
@@ -689,7 +711,7 @@ const iString *canonicalUrl_String(const iString *d) {
             canon = copy_String(d);
         }
         urlEncodeSpaces_String(canon);
-    }
+    }    
     return canon ? collect_String(canon) : d;
 }
 
diff --git a/src/gmutil.h b/src/gmutil.h
index 6d337eeb..15bb7b2e 100644
--- a/src/gmutil.h
+++ b/src/gmutil.h
@@ -100,6 +100,7 @@ iRegExp *       newGemtextLink_RegExp   (void);
 
 #define GEMINI_DEFAULT_PORT         ((uint16_t) 1965)
 #define GEMINI_DEFAULT_PORT_CSTR    "1965"
+#define URL_RESERVED_CHARS          ":/?#[]@!$&'()*+,;=" /* RFC 3986 */
 
 struct Impl_Url {
     iRangecc scheme;
@@ -131,6 +132,7 @@ const iString * urlFragmentStripped_String(const iString *);
 const iString * urlQueryStripped_String (const iString *);
 void            urlDecodePath_String    (iString *);
 void            urlEncodePath_String    (iString *);
+void            urlEncodeQuery_String   (iString *);
 iString *       makeFileUrl_String      (const iString *localFilePath);
 const char *    makeFileUrl_CStr        (const char *localFilePath);
 iString *       localFilePathFromUrl_String(const iString *);
diff --git a/src/ui/inputwidget.c b/src/ui/inputwidget.c
index b94e0c27..24983d69 100644
--- a/src/ui/inputwidget.c
+++ b/src/ui/inputwidget.c
@@ -1100,9 +1100,15 @@ static void updateBuffered_InputWidget_(iInputWidget *d) {
 void setText_InputWidget(iInputWidget *d, const iString *text) {
     if (!d) return;
     if (d->inFlags & isUrl_InputWidgetFlag) {
-        /* If user wants URLs encoded, also Punycode the domain. */
-        if (!prefs_App()->decodeUserVisibleURLs) {
+        if (prefs_App()->decodeUserVisibleURLs) {
             iString *enc = collect_String(copy_String(text));
+            urlDecodePath_String(enc);
+            text = enc;
+        }
+        else {
+            /* The user wants URLs encoded, also Punycode the domain. */
+            iString *enc = collect_String(copy_String(text));
+            urlEncodePath_String(enc);
             /* Prevent address bar spoofing (mentioned as IDN homograph attack in
                https://github.com/skyjake/lagrange/issues/73) */
             punyEncodeUrlHost_String(enc);
Proxy Information
Original URL
gemini://git.skyjake.fi/lagrange/work%2Fv1.10/cdiff/5e31f66313e2dc15fb9e75395504feb0992c3feb
Status Code
Success (20)
Meta
text/gemini; charset=utf-8
Capsule Response Time
55.674046 milliseconds
Gemini-to-HTML Time
0.362569 milliseconds

This content has been proxied by September (ba2dc).