diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-01-24 21:35:03 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-01-24 21:39:07 -0800 |
commit | 829b089c80895d9a78938c5bc7747aea1cd48eb6 (patch) | |
tree | 53bd534741a90c547c5d87039efa5ee625da8081 /src/houdini_html_u.c | |
parent | 5ef31853d5161d4b5a2dfc0df94e6eaaeb3215d0 (diff) |
Removed implementation-specific material from repository.
The C and JS implementations are being split off into
different repositories.
This repository will just have the spec itself.
Diffstat (limited to 'src/houdini_html_u.c')
-rw-r--r-- | src/houdini_html_u.c | 113 |
1 files changed, 0 insertions, 113 deletions
diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c deleted file mode 100644 index ecd7faa..0000000 --- a/src/houdini_html_u.c +++ /dev/null @@ -1,113 +0,0 @@ -#include <assert.h> -#include <stdio.h> -#include <string.h> - -#include "buffer.h" -#include "houdini.h" -#include "utf8.h" -#include "html_unescape.h" - -size_t -houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) -{ - size_t i = 0; - - if (size > 3 && src[0] == '#') { - int codepoint = 0; - - if (_isdigit(src[1])) { - for (i = 1; i < size && _isdigit(src[i]); ++i) { - int cp = (codepoint * 10) + (src[i] - '0'); - - if (cp < codepoint) - return 0; - - codepoint = cp; - } - } - - else if (src[1] == 'x' || src[1] == 'X') { - for (i = 2; i < size && _isxdigit(src[i]); ++i) { - int cp = (codepoint * 16) + ((src[i] | 32) % 39 - 9); - - if (cp < codepoint) - return 0; - - codepoint = cp; - } - } - - if (i < size && src[i] == ';' && codepoint) { - utf8proc_encode_char(codepoint, ob); - return i + 1; - } - } - - else { - if (size > MAX_WORD_LENGTH) - size = MAX_WORD_LENGTH; - - for (i = MIN_WORD_LENGTH; i < size; ++i) { - if (src[i] == ' ') - break; - - if (src[i] == ';') { - const struct html_ent *entity = find_entity((char *)src, i); - - if (entity != NULL) { - size_t len = strnlen((const char *)entity->utf8, 4); - cmark_strbuf_put(ob, entity->utf8, len); - return i + 1; - } - - break; - } - } - } - - return 0; -} - -int -houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) -{ - size_t i = 0, org, ent; - - while (i < size) { - org = i; - while (i < size && src[i] != '&') - i++; - - if (likely(i > org)) { - if (unlikely(org == 0)) { - if (i >= size) - return 0; - - cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size)); - } - - cmark_strbuf_put(ob, src + org, i - org); - } - - /* escaping */ - if (i >= size) - break; - - i++; - - ent = houdini_unescape_ent(ob, src + i, size - i); - i += ent; - - /* not really an entity */ - if (ent == 0) - cmark_strbuf_putc(ob, '&'); - } - - return 1; -} - -void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size) -{ - if (!houdini_unescape_html(ob, src, size)) - cmark_strbuf_put(ob, src, size); -} |