]>
code.delx.au - pulseaudio/blob - src/polypcore/utf8.c
3 /* This file is based on the GLIB utf8 validation functions. The
4 * original license text follows. */
6 /* gutf8.c - Operations on UTF-8 strings.
8 * Copyright (C) 1999 Tom Tromey
9 * Copyright (C) 2000 Red Hat, Inc.
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 02111-1307, USA.
36 #include <polyp/xmalloc.h>
40 #define FILTER_CHAR '_'
42 static inline int is_unicode_valid(uint32_t ch
) {
43 if (ch
>= 0x110000) /* End of unicode space */
45 if ((ch
& 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
47 if ((ch
>= 0xFDD0) && (ch
<= 0xFDEF)) /* Reserved */
49 if ((ch
& 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
54 static inline int is_continuation_char(uint8_t ch
) {
55 if ((ch
& 0xc0) != 0x80) /* 10xxxxxx */
60 static inline void merge_continuation_char(uint32_t *u_ch
, uint8_t ch
) {
65 static char* utf8_validate(const char *str
, char *output
) {
68 const uint8_t *p
, *last
;
72 o
= (uint8_t*) output
;
73 for (p
= (const uint8_t*) str
; *p
; p
++) {
80 if ((*p
& 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
85 } else if ((*p
& 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
90 } else if ((*p
& 0xf8) == 0xf0) { /* 11110xxx four-char seq */
100 if (!is_continuation_char(*p
))
102 merge_continuation_char(&val
, *p
);
106 if (!is_continuation_char(*p
))
108 merge_continuation_char(&val
, *p
);
112 if (!is_continuation_char(*p
))
114 merge_continuation_char(&val
, *p
);
119 if (!is_unicode_valid(val
))
123 memcpy(o
, last
, size
);
135 p
= last
; /* We retry at the next character */
155 const char* pa_utf8_valid (const char *str
) {
156 return utf8_validate(str
, NULL
);
159 char* pa_utf8_filter (const char *str
) {
162 new_str
= pa_xnew(char, strlen(str
) + 1);
164 return utf8_validate(str
, new_str
);