aboutsummaryrefslogtreecommitdiff
path: root/src/html/houdini_href_e.c
blob: 12456cec68790507625716ce1b40958fba430190 (plain)
  1. #include <assert.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include "html/houdini.h"
  5. /*
  6. * The following characters will not be escaped:
  7. *
  8. * -_.+!*'(),%#@?=;:/,+&$ alphanum
  9. *
  10. * Note that this character set is the addition of:
  11. *
  12. * - The characters which are safe to be in an URL
  13. * - The characters which are *not* safe to be in
  14. * an URL because they are RESERVED characters.
  15. *
  16. * We asume (lazily) that any RESERVED char that
  17. * appears inside an URL is actually meant to
  18. * have its native function (i.e. as an URL
  19. * component/separator) and hence needs no escaping.
  20. *
  21. * There are two exceptions: the chacters & (amp)
  22. * and ' (single quote) do not appear in the table.
  23. * They are meant to appear in the URL as components,
  24. * yet they require special HTML-entity escaping
  25. * to generate valid HTML markup.
  26. *
  27. * All other characters will be escaped to %XX.
  28. *
  29. */
  30. static const char HREF_SAFE[] = {
  31. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  32. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  33. 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
  34. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
  35. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  36. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
  37. 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  38. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
  39. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  40. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  41. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  42. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  43. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  44. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  45. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  46. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  47. };
  48. int
  49. houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size)
  50. {
  51. static const uint8_t hex_chars[] = "0123456789ABCDEF";
  52. size_t i = 0, org;
  53. uint8_t hex_str[3];
  54. hex_str[0] = '%';
  55. while (i < size) {
  56. org = i;
  57. while (i < size && HREF_SAFE[src[i]] != 0)
  58. i++;
  59. if (likely(i > org))
  60. strbuf_put(ob, src + org, i - org);
  61. /* escaping */
  62. if (i >= size)
  63. break;
  64. switch (src[i]) {
  65. /* amp appears all the time in URLs, but needs
  66. * HTML-entity escaping to be inside an href */
  67. case '&':
  68. strbuf_puts(ob, "&amp;");
  69. break;
  70. /* the single quote is a valid URL character
  71. * according to the standard; it needs HTML
  72. * entity escaping too */
  73. case '\'':
  74. strbuf_puts(ob, "&#x27;");
  75. break;
  76. /* the space can be escaped to %20 or a plus
  77. * sign. we're going with the generic escape
  78. * for now. the plus thing is more commonly seen
  79. * when building GET strings */
  80. #if 0
  81. case ' ':
  82. strbuf_putc(ob, '+');
  83. break;
  84. #endif
  85. /* every other character goes with a %XX escaping */
  86. default:
  87. hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
  88. hex_str[2] = hex_chars[src[i] & 0xF];
  89. strbuf_put(ob, hex_str, 3);
  90. }
  91. i++;
  92. }
  93. return 1;
  94. }