aboutsummaryrefslogtreecommitdiff
path: root/src/detab.c
blob: e03fcf78335c5bb4bede7c7c4c2793d7c3f80fea (plain)
  1. #include "bstrlib.h"
  2. // UTF-8 aware detab: assumes s has no newlines, or only a final newline.
  3. // Return 0 on success, BSTR_ERR if invalid UTF-8.
  4. extern int bdetab(bstring s, int utf8)
  5. {
  6. unsigned char c;
  7. int pos = 0; // a count of characters
  8. int byte = 0; // a count of bytes
  9. int high_chars_to_skip = 0;
  10. int numspaces = 0;
  11. while ((c = bchar(s, byte))) {
  12. if (utf8 && high_chars_to_skip > 0) {
  13. if (c >= 0x80) {
  14. high_chars_to_skip--;
  15. byte++;
  16. } else {
  17. return BSTR_ERR; // invalid utf-8
  18. }
  19. } else if (c == '\t') {
  20. bdelete(s, byte, 1); // delete tab character
  21. numspaces = 4 - (pos % 4);
  22. binsertch(s, byte, numspaces, ' ');
  23. byte += numspaces;
  24. pos += numspaces;
  25. } else if (c <= 0x80 || !utf8) {
  26. byte++;
  27. pos++;
  28. } else { // multibyte utf8 sequences
  29. if (c >> 1 == 0176) {
  30. high_chars_to_skip = 5;
  31. } else if (c >> 2 == 076) {
  32. high_chars_to_skip = 4;
  33. } else if (c >> 3 == 036) {
  34. high_chars_to_skip = 3;
  35. } else if (c >> 4 == 016) {
  36. high_chars_to_skip = 2;
  37. } else if (c >> 5 == 06) {
  38. high_chars_to_skip = 1;
  39. } else {
  40. return BSTR_ERR; // invalid utf-8
  41. }
  42. pos++;
  43. byte++;
  44. }
  45. }
  46. return 0;
  47. }