diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-12-29 22:15:09 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-12-29 22:16:11 -0800 |
commit | 86fda06897ccd4d610410f920923c6e1f3e2bf3d (patch) | |
tree | 980d5b11b914223de03e1688503d40f9b4acbbec | |
parent | d943eed9db668bb3399264d5c978e20882bc6098 (diff) |
Added cmark_ctype.h with locale-independent isspace, ispunct, etc.
Otherwise cmark's behavior varies unpredictably with the locale.
`is_punctuation` in utf8.h has also been adjusted so that everything
that counts all ASCII symbol characters count as punctuation, even
though some are not in P* character classes.
-rw-r--r-- | src/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/blocks.c | 2 | ||||
-rw-r--r-- | src/buffer.c | 2 | ||||
-rw-r--r-- | src/chunk.h | 2 | ||||
-rw-r--r-- | src/cmark_ctype.c | 33 | ||||
-rw-r--r-- | src/cmark_ctype.h | 11 | ||||
-rw-r--r-- | src/inlines.c | 2 | ||||
-rw-r--r-- | src/utf8.c | 13 |
8 files changed, 52 insertions, 15 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4272234..87651bc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,6 +15,7 @@ set(HEADERS inlines.h html_unescape.h houdini.h + cmark_ctype.h ) set(LIBRARY_SOURCES cmark.c @@ -34,6 +35,7 @@ set(LIBRARY_SOURCES houdini_href_e.c houdini_html_e.c houdini_html_u.c + cmark_ctype.c ${HEADERS} ) diff --git a/src/blocks.c b/src/blocks.c index 319706d..b3ea362 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1,8 +1,8 @@ #include <stdlib.h> #include <assert.h> #include <stdio.h> -#include <ctype.h> +#include "cmark_ctype.h" #include "config.h" #include "parser.h" #include "cmark.h" diff --git a/src/buffer.c b/src/buffer.c index 73a9728..40e8674 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1,11 +1,11 @@ #include <stdarg.h> -#include <ctype.h> #include <string.h> #include <assert.h> #include <string.h> #include <stdio.h> #include <stdlib.h> +#include "cmark_ctype.h" #include "buffer.h" /* Used as default value for cmark_strbuf->ptr so that people can always diff --git a/src/chunk.h b/src/chunk.h index 0f48791..ba6c89e 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -2,9 +2,9 @@ #define CMARK_CHUNK_H #include <string.h> -#include <ctype.h> #include <stdlib.h> #include <assert.h> +#include "cmark_ctype.h" #include "buffer.h" typedef struct { diff --git a/src/cmark_ctype.c b/src/cmark_ctype.c new file mode 100644 index 0000000..9ed4b5c --- /dev/null +++ b/src/cmark_ctype.c @@ -0,0 +1,33 @@ +/** + * Returns 1 if c is a "whitespace" character as defined by the spec. + */ +int isspace(char c) +{ + return (c == 0x09 || + c == 0x20 || + c == 0x0a || + c == 0x0d); +} + +/** + * Returns 1 if c is an ascii punctuation character. + */ +int ispunct(char c) +{ + return ((c >= 33 && c <= 47) || + (c >= 58 && c <= 64) || + (c >= 91 && c <= 96) || + (c >= 123 && c <= 126)); +} + +int isalnum(char c) +{ + return ((c >= 48 && c <= 57) || + (c >= 65 && c <= 90) || + (c >= 97 && c <= 122)); +} + +int isdigit(char c) +{ + return (c >= 48 && c <= 57); +} diff --git a/src/cmark_ctype.h b/src/cmark_ctype.h new file mode 100644 index 0000000..afc605e --- /dev/null +++ b/src/cmark_ctype.h @@ -0,0 +1,11 @@ +/** Locale-independent versions of functions from ctype.h. + * We want cmark to behave the same no matter what the system locale. + */ + +int isspace(char c); + +int ispunct(char c); + +int isalnum(char c); + +int isdigit(char c); diff --git a/src/inlines.c b/src/inlines.c index 8235f59..9d2d7f8 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1,8 +1,8 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> -#include <ctype.h> +#include "cmark_ctype.h" #include "config.h" #include "node.h" #include "parser.h" @@ -2,6 +2,7 @@ #include <stdint.h> #include <assert.h> +#include "cmark_ctype.h" #include "utf8.h" static const int8_t utf8proc_utf8class[256] = { @@ -268,17 +269,7 @@ int utf8proc_is_space(int32_t uc) // matches anything in the P[cdefios] classes. int utf8proc_is_punctuation(int32_t uc) { - return ((uc >= 33 && uc <= 35) || - (uc >= 37 && uc <= 42) || - (uc >= 44 && uc <= 47) || - uc == 58 || - uc == 59 || - uc == 63 || - uc == 64 || - (uc >= 91 && uc <= 93) || - uc == 95 || - uc == 123 || - uc == 125 || + return ((uc < 128 && ispunct((char)uc)) || uc == 161 || uc == 167 || uc == 171 || |