commit: 775c33f397eeaf0277c3d6ba3bafaa8aec60b4aa
parent faadcbaa492ca5d73d7a196affad47ab5cd9b46a
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Mon, 24 Jun 2024 18:00:10 +0200
uri: Add URI decoding
Diffstat:
M | .gitignore | 1 | + |
M | Makefile | 16 | +++++++++++++--- |
M | httpc.c | 2 | ++ |
A | httpc.h | 13 | +++++++++++++ |
A | uri.c | 205 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | uri_test.c | 139 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
6 files changed, 373 insertions(+), 3 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
/config.mk
/httpc
+/*_test
diff --git a/Makefile b/Makefile
@@ -1,9 +1,19 @@
# SPDX-License-Identifier: MPL-2.0
include config.mk
-httpc: httpc.c
- ${CC} -std=c99 ${CFLAGS} ${DEPS_cflags} -o $@ $< ${LDFLAGS} ${DEPS_libs}
+.PHONY: all
+all: httpc
+
+httpc: httpc.c httpc.h uri.c
+ ${CC} -std=c99 ${CFLAGS} ${DEPS_cflags} -o $@ httpc.c uri.c ${LDFLAGS} ${DEPS_libs}
+
+uri_test: httpc.h uri.c uri_test.c
+ ${CC} -std=c99 ${CFLAGS} -o $@ uri_test.c uri.c ${LDFLAGS}
+
+.PHONY: check
+check: uri_test
+ ./uri_test
.PHONY: clean
clean:
- rm httpc
+ rm -f httpc uri_test
diff --git a/httpc.c b/httpc.c
@@ -12,6 +12,8 @@
#include <stdbool.h>
#include <fcntl.h> // open, O_*
+#include "httpc.h"
+
int body = 0;
bool verbose = false;
diff --git a/httpc.h b/httpc.h
@@ -0,0 +1,13 @@
+// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+httpc@hacktivis.me>
+// SPDX-License-Identifier: MPL-2.0
+
+struct URI {
+ char *scheme;
+ char *host;
+ char *port;
+ char *path;
+ char *fragment;
+};
+
+void decode_uri_finish(struct URI *uri);
+struct URI *decode_uri(const char *arg, char **err);
diff --git a/uri.c b/uri.c
@@ -0,0 +1,205 @@
+// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+httpc@hacktivis.me>
+// SPDX-License-Identifier: MPL-2.0
+
+#define _POSIX_C_SOURCE 200809L
+
+#include "httpc.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <ctype.h> // isalnum, iscntrl
+#include <string.h> // strlen, strerror, strdup
+#include <stdlib.h> // free
+
+// rfc1945.html#section-3.2
+
+static inline int
+is_reserved(int c)
+{
+ return c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || c == '&' | c == '=' | c == '+';
+}
+
+static inline int
+is_extra(int c)
+{
+ return c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' || c == ',';
+}
+
+static inline int
+is_safe(int c)
+{
+ return c == '$' || c == '-' || c == '_' || c == '.';
+}
+
+static inline int
+is_unsafe(int c)
+{
+ return iscntrl(c) || c == ' ' || c == '#' || c == '%' || c == '<' || c == '>';
+}
+
+static int
+is_national(int c)
+{
+ return !isalnum(c) && !is_reserved(c) && !is_extra(c) && !is_safe(c) && !is_unsafe(c);
+}
+
+static int
+is_unreserved(int c)
+{
+ return isalnum(c) || is_safe(c) || is_extra(c) || is_national(c);
+}
+
+void
+decode_uri_finish(struct URI *uri)
+{
+ if(uri == NULL) return;
+
+ free(uri->scheme);
+ free(uri->host);
+ if(uri->port != NULL) free(uri->port);
+}
+
+// Caller must call decode_uri_finish
+struct URI *
+decode_uri(const char *ori, char **err)
+{
+ static struct URI uri = {
+ .scheme = NULL,
+ .host = NULL,
+ .port = NULL,
+ .path = NULL,
+ .fragment = NULL
+ };
+
+ if(ori == NULL)
+ {
+ *err = "Null pointer passed";
+ return NULL;
+ }
+
+ errno = 0;
+ char *arg = strdup(ori);
+ if(arg == NULL)
+ {
+ *err = strerror(errno);
+ return NULL;
+ }
+
+ int i = 0;
+ int len = strlen(arg);
+
+ // scheme
+ for(;i<len;i++)
+ {
+ char c = arg[i];
+
+ if(isalnum(c) || c == '+' || c == '-' || c == '.') continue;
+
+ if(c != ':')
+ {
+ static char error[] = "Invalid char in scheme: _";
+ error[sizeof(error)-2] = c;
+
+ *err = error;
+ free(arg);
+ return NULL;
+ }
+
+ arg[i++] = '\0';
+ uri.scheme = arg;
+ break;
+ }
+
+ if(arg[i] != '/' || arg[i+1] != '/')
+ {
+ *err = "Missing :// after scheme";
+ free(arg);
+ return NULL;
+ } else i+=2;
+
+ // IPv6 address
+ if(arg[i] == '[')
+ {
+ char *addr = arg+i+1;
+
+ for(;i<len && arg[i] != ']';i++);
+
+ if(arg[i] != ']')
+ {
+ *err = "Unmatched [ in host";
+ free(arg);
+ return NULL;
+ };
+
+ arg[i] = '\0';
+ i++;
+
+ uri.host = strdup(addr);
+ if(uri.host == NULL)
+ {
+ *err = "host = strdup(...) failed";
+ free(arg);
+ return NULL;
+ }
+ }
+ else
+ {
+ char *addr = arg+i;
+ size_t addr_len = 0;
+
+ // hostname or IPv4 address
+ for(;i<len;i++,addr_len++)
+ {
+ char c = arg[i];
+
+ if(c == '/' || c == ':') break;
+ }
+
+ uri.host = strndup(addr, addr_len);
+ if(uri.host == NULL)
+ {
+ *err = "host = strdup(...) failed";
+ free(arg);
+ return NULL;
+ }
+ }
+
+ // port (optional)
+ if(arg[i] == ':')
+ {
+ arg[i++] = '\0';
+
+ char *port = arg+i;
+ size_t port_len = 0;
+
+ for(;i<len;i++,port_len++)
+ {
+ char c = arg[i];
+
+ if(c == '/') break;
+
+ if(!isdigit(c))
+ {
+ static char error[] = "Invalid char in port: _";
+ error[sizeof(error)-2] = c;
+
+ *err = error;
+ free(arg);
+ return NULL;
+ }
+ }
+
+ uri.port = strndup(port, port_len);
+ if(uri.port == NULL)
+ {
+ *err = "port = strdup(...) failed";
+ free(arg);
+ return NULL;
+ }
+ }
+
+ // FIXME: Check for invalid characters
+ uri.path = arg+i;
+
+ return &uri;
+}
diff --git a/uri_test.c b/uri_test.c
@@ -0,0 +1,139 @@
+// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+httpc@hacktivis.me>
+// SPDX-License-Identifier: MPL-2.0
+
+#define _POSIX_C_SOURCE 200809L
+
+#include "httpc.h" // decode_uri
+#include <stdio.h> // printf
+#include <string.h> // strcmp
+#include <stdbool.h>
+
+int counter = 0;
+
+#define diag(x) (diag_f((int)(x), #x))
+static int
+diag_f(int status, const char *expression)
+{
+ if(status) return status;
+
+ printf("# Expression failed: %s\n", expression);
+ return status;
+}
+
+#define str_match(got, expt) (str_match_f(got, #got, expt, #expt))
+static int
+str_match_f(const char *got, const char *got_name, const char *expt, const char *expt_name)
+{
+ if(got == NULL && expt == NULL) return 0;
+
+ if(got != NULL && expt != NULL)
+ {
+ if(strcmp(got, expt) == 0) return 0;
+
+ printf("# %s (\"%s\") != %s (\"%s\")\n", got_name, got, expt_name, expt);
+ return 1;
+ }
+
+ printf("# %s (\"%s\") != %s (\"%s\")\n", got_name, got, expt_name, expt);
+ return 0;
+}
+
+static bool
+t(char *arg, struct URI *exp, const char *error)
+{
+ int id = ++counter;
+ char *res_error = NULL;
+ struct URI *res = decode_uri(arg, &res_error);
+
+ int err = 0;
+
+ err += str_match(res_error, error);
+
+ if(exp == NULL)
+ {
+ if(!diag(res == NULL)) err++;
+ }
+ else
+ {
+ if(diag(res != NULL))
+ {
+ err += str_match(exp->scheme, res->scheme);
+ err += str_match(exp->host, res->host);
+ err += str_match(exp->port, res->port);
+ err += str_match(exp->path, res->path);
+ err += str_match(exp->fragment, res->fragment);
+ } else err++;
+ }
+
+ decode_uri_finish(res);
+
+ if(err != 0)
+ {
+ printf("not ok %d - %s\n", id, arg);
+ return 1;
+ }
+
+ printf("ok %d - %s\n", id, arg);
+ return 0;
+}
+
+int
+main()
+{
+ int plan = 4;
+ printf("1..%d\n", plan);
+
+ int err = 0;
+
+ err += t(
+ "https://example.org/",
+ &(struct URI){
+ .scheme = "https",
+ .host = "example.org",
+ .port = NULL,
+ .path = "/",
+ .fragment = NULL,
+ },
+ NULL
+ );
+
+ err += t(
+ "https://example.org",
+ &(struct URI){
+ .scheme = "https",
+ .host = "example.org",
+ .port = NULL,
+ .path = "",
+ .fragment = NULL,
+ },
+ NULL
+ );
+
+ err += t(
+ "https://example.org:1337/",
+ &(struct URI){
+ .scheme = "https",
+ .host = "example.org",
+ .port = "1337",
+ .path = "/",
+ .fragment = NULL,
+ },
+ NULL
+ );
+
+ err += t(
+ "https://[fe80::feed:face]:1337/",
+ &(struct URI){
+ .scheme = "https",
+ .host = "fe80::feed:face",
+ .port = "1337",
+ .path = "/",
+ .fragment = NULL,
+ },
+ NULL
+ );
+
+ if(!diag(plan == counter)) err++;
+
+ return err;
+}