]>
code.delx.au - pulseaudio/blob - src/polyp/utf8.c
3 /* This file is based on the GLIB utf8 validation functions. The
4 * original license text follows. */
6 /* gutf8.c - Operations on UTF-8 strings.
8 * Copyright (C) 1999 Tom Tromey
9 * Copyright (C) 2000 Red Hat, Inc.
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 02111-1307, USA.
39 #define FILTER_CHAR '_'
41 static inline int is_unicode_valid(uint32_t ch
) {
42 if (ch
>= 0x110000) /* End of unicode space */
44 if ((ch
& 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
46 if ((ch
>= 0xFDD0) && (ch
<= 0xFDEF)) /* Reserved */
48 if ((ch
& 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
53 static inline int is_continuation_char(uint8_t ch
) {
54 if ((ch
& 0xc0) != 0x80) /* 10xxxxxx */
59 static inline void merge_continuation_char(uint32_t *u_ch
, uint8_t ch
) {
64 static char* utf8_validate(const char *str
, char *output
) {
67 const uint8_t *p
, *last
;
71 o
= (uint8_t*) output
;
72 for (p
= (const uint8_t*) str
; *p
; p
++) {
79 if ((*p
& 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
84 } else if ((*p
& 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
89 } else if ((*p
& 0xf8) == 0xf0) { /* 11110xxx four-char seq */
99 if (!is_continuation_char(*p
))
101 merge_continuation_char(&val
, *p
);
105 if (!is_continuation_char(*p
))
107 merge_continuation_char(&val
, *p
);
111 if (!is_continuation_char(*p
))
113 merge_continuation_char(&val
, *p
);
118 if (!is_unicode_valid(val
))
122 memcpy(o
, last
, size
);
134 p
= last
; /* We retry at the next character */
154 const char* pa_utf8_valid (const char *str
) {
155 return utf8_validate(str
, NULL
);
158 char* pa_utf8_filter (const char *str
) {
161 new_str
= pa_xnew(char, strlen(str
) + 1);
163 return utf8_validate(str
, new_str
);