blob: e03fcf78335c5bb4bede7c7c4c2793d7c3f80fea (
plain)
- #include "bstrlib.h"
- // UTF-8 aware detab: assumes s has no newlines, or only a final newline.
- // Return 0 on success, BSTR_ERR if invalid UTF-8.
- extern int bdetab(bstring s, int utf8)
- {
- unsigned char c;
- int pos = 0; // a count of characters
- int byte = 0; // a count of bytes
- int high_chars_to_skip = 0;
- int numspaces = 0;
- while ((c = bchar(s, byte))) {
- if (utf8 && high_chars_to_skip > 0) {
- if (c >= 0x80) {
- high_chars_to_skip--;
- byte++;
- } else {
- return BSTR_ERR; // invalid utf-8
- }
- } else if (c == '\t') {
- bdelete(s, byte, 1); // delete tab character
- numspaces = 4 - (pos % 4);
- binsertch(s, byte, numspaces, ' ');
- byte += numspaces;
- pos += numspaces;
- } else if (c <= 0x80 || !utf8) {
- byte++;
- pos++;
- } else { // multibyte utf8 sequences
- if (c >> 1 == 0176) {
- high_chars_to_skip = 5;
- } else if (c >> 2 == 076) {
- high_chars_to_skip = 4;
- } else if (c >> 3 == 036) {
- high_chars_to_skip = 3;
- } else if (c >> 4 == 016) {
- high_chars_to_skip = 2;
- } else if (c >> 5 == 06) {
- high_chars_to_skip = 1;
- } else {
- return BSTR_ERR; // invalid utf-8
- }
- pos++;
- byte++;
- }
- }
- return 0;
- }
|