]>
code.delx.au - pulseaudio/blob - src/polyp/utf8.c
33fa7214e0aedb409a61b9c6a3fac0ff75466062
3 /* This file is based on the GLIB utf8 validation functions. The
4 * original license text follows. */
6 /* gutf8.c - Operations on UTF-8 strings.
8 * Copyright (C) 1999 Tom Tromey
9 * Copyright (C) 2000 Red Hat, Inc.
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 02111-1307, USA.
44 #define FILTER_CHAR '_'
46 static inline int is_unicode_valid(uint32_t ch
) {
47 if (ch
>= 0x110000) /* End of unicode space */
49 if ((ch
& 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
51 if ((ch
>= 0xFDD0) && (ch
<= 0xFDEF)) /* Reserved */
53 if ((ch
& 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
58 static inline int is_continuation_char(uint8_t ch
) {
59 if ((ch
& 0xc0) != 0x80) /* 10xxxxxx */
64 static inline void merge_continuation_char(uint32_t *u_ch
, uint8_t ch
) {
69 static char* utf8_validate(const char *str
, char *output
) {
72 const uint8_t *p
, *last
;
76 o
= (uint8_t*) output
;
77 for (p
= (const uint8_t*) str
; *p
; p
++) {
84 if ((*p
& 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
89 } else if ((*p
& 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
94 } else if ((*p
& 0xf8) == 0xf0) { /* 11110xxx four-char seq */
104 if (!is_continuation_char(*p
))
106 merge_continuation_char(&val
, *p
);
110 if (!is_continuation_char(*p
))
112 merge_continuation_char(&val
, *p
);
116 if (!is_continuation_char(*p
))
118 merge_continuation_char(&val
, *p
);
123 if (!is_unicode_valid(val
))
127 memcpy(o
, last
, size
);
139 p
= last
; /* We retry at the next character */
159 const char* pa_utf8_valid (const char *str
) {
160 return utf8_validate(str
, NULL
);
163 char* pa_utf8_filter (const char *str
) {
166 new_str
= pa_xnew(char, strlen(str
) + 1);
168 return utf8_validate(str
, new_str
);
173 static char* iconv_simple(const char *str
, const char *to
, const char *from
) {
178 ICONV_CONST
char *inbuf
;
180 size_t res
, inbytes
, outbytes
;
182 cd
= iconv_open(to
, from
);
183 if (cd
== (iconv_t
)-1)
186 inlen
= len
= strlen(str
) + 1;
187 new_str
= pa_xmalloc(len
);
191 inbuf
= (ICONV_CONST
char*)str
; /* Brain dead prototype for iconv() */
196 res
= iconv(cd
, &inbuf
, &inbytes
, &outbuf
, &outbytes
);
198 if (res
!= (size_t)-1)
201 if (errno
!= E2BIG
) {
207 assert(inbytes
!= 0);
210 new_str
= pa_xrealloc(new_str
, len
);
219 char* pa_utf8_to_locale (const char *str
) {
220 return iconv_simple(str
, "", "UTF-8");
223 char* pa_locale_to_utf8 (const char *str
) {
224 return iconv_simple(str
, "UTF-8", "");
229 char* pa_utf8_to_locale (const char *str
) {
233 char* pa_locale_to_utf8 (const char *str
) {