aboutsummaryrefslogtreecommitdiff
path: root/src/html/houdini_href_e.c
blob: 59fe8507718c627b9e029037133344c00a52a11c (plain)
  1. #include <assert.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include "html/houdini.h"
  5. /*
  6. * The following characters will not be escaped:
  7. *
  8. * -_.+!*'(),%#@?=;:/,+&$ alphanum
  9. *
  10. * Note that this character set is the addition of:
  11. *
  12. * - The characters which are safe to be in an URL
  13. * - The characters which are *not* safe to be in
  14. * an URL because they are RESERVED characters.
  15. *
  16. * We asume (lazily) that any RESERVED char that
  17. * appears inside an URL is actually meant to
  18. * have its native function (i.e. as an URL
  19. * component/separator) and hence needs no escaping.
  20. *
  21. * There are two exceptions: the chacters & (amp)
  22. * and ' (single quote) do not appear in the table.
  23. * They are meant to appear in the URL as components,
  24. * yet they require special HTML-entity escaping
  25. * to generate valid HTML markup.
  26. *
  27. * All other characters will be escaped to %XX.
  28. *
  29. */
  30. static const char HREF_SAFE[] = {
  31. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  32. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  33. 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
  34. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
  35. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  36. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
  37. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  38. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
  39. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  40. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  41. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  42. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  43. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  44. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  45. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  46. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  47. };
  48. int
  49. houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
  50. {
  51. static const uint8_t hex_chars[] = "0123456789ABCDEF";
  52. size_t i = 0, org;
  53. uint8_t hex_str[3];
  54. hex_str[0] = '%';
  55. while (i < size) {
  56. org = i;
  57. while (i < size && HREF_SAFE[src[i]] != 0)
  58. i++;
  59. if (likely(i > org)) {
  60. if (unlikely(org == 0)) {
  61. if (i >= size)
  62. return 0;
  63. gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
  64. }
  65. gh_buf_put(ob, src + org, i - org);
  66. }
  67. /* escaping */
  68. if (i >= size)
  69. break;
  70. switch (src[i]) {
  71. /* amp appears all the time in URLs, but needs
  72. * HTML-entity escaping to be inside an href */
  73. case '&':
  74. gh_buf_puts(ob, "&amp;");
  75. break;
  76. /* the single quote is a valid URL character
  77. * according to the standard; it needs HTML
  78. * entity escaping too */
  79. case '\'':
  80. gh_buf_puts(ob, "&#x27;");
  81. break;
  82. /* the space can be escaped to %20 or a plus
  83. * sign. we're going with the generic escape
  84. * for now. the plus thing is more commonly seen
  85. * when building GET strings */
  86. #if 0
  87. case ' ':
  88. gh_buf_putc(ob, '+');
  89. break;
  90. #endif
  91. /* every other character goes with a %XX escaping */
  92. default:
  93. hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
  94. hex_str[2] = hex_chars[src[i] & 0xF];
  95. gh_buf_put(ob, hex_str, 3);
  96. }
  97. i++;
  98. }
  99. return 1;
  100. }