diff options
Diffstat (limited to 'src/houdini_href_e.c')
-rw-r--r-- | src/houdini_href_e.c | 107 |
1 files changed, 0 insertions, 107 deletions
diff --git a/src/houdini_href_e.c b/src/houdini_href_e.c deleted file mode 100644 index 7527780..0000000 --- a/src/houdini_href_e.c +++ /dev/null @@ -1,107 +0,0 @@ -#include <assert.h> -#include <stdio.h> -#include <string.h> - -#include "houdini.h" - -/* - * The following characters will not be escaped: - * - * -_.+!*'(),%#@?=;:/,+&$ alphanum - * - * Note that this character set is the addition of: - * - * - The characters which are safe to be in an URL - * - The characters which are *not* safe to be in - * an URL because they are RESERVED characters. - * - * We asume (lazily) that any RESERVED char that - * appears inside an URL is actually meant to - * have its native function (i.e. as an URL - * component/separator) and hence needs no escaping. - * - * There are two exceptions: the chacters & (amp) - * and ' (single quote) do not appear in the table. - * They are meant to appear in the URL as components, - * yet they require special HTML-entity escaping - * to generate valid HTML markup. - * - * All other characters will be escaped to %XX. - * - */ -static const char HREF_SAFE[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -int -houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size) -{ - static const uint8_t hex_chars[] = "0123456789ABCDEF"; - size_t i = 0, org; - uint8_t hex_str[3]; - - hex_str[0] = '%'; - - while (i < size) { - org = i; - while (i < size && HREF_SAFE[src[i]] != 0) - i++; - - if (likely(i > org)) - cmark_strbuf_put(ob, src + org, i - org); - - /* escaping */ - if (i >= size) - break; - - switch (src[i]) { - /* amp appears all the time in URLs, but needs - * HTML-entity escaping to be inside an href */ - case '&': - cmark_strbuf_puts(ob, "&"); - break; - - /* the single quote is a valid URL character - * according to the standard; it needs HTML - * entity escaping too */ - case '\'': - cmark_strbuf_puts(ob, "'"); - break; - - /* the space can be escaped to %20 or a plus - * sign. we're going with the generic escape - * for now. the plus thing is more commonly seen - * when building GET strings */ -#if 0 - case ' ': - cmark_strbuf_putc(ob, '+'); - break; -#endif - - /* every other character goes with a %XX escaping */ - default: - hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; - hex_str[2] = hex_chars[src[i] & 0xF]; - cmark_strbuf_put(ob, hex_str, 3); - } - - i++; - } - - return 1; -} |