]>
code.delx.au - pulseaudio/blob - src/pulse/utf8.c
2 This file is part of PulseAudio.
4 Copyright 2006 Lennart Poettering
5 Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB
7 PulseAudio is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as
9 published by the Free Software Foundation; either version 2.1 of the
10 License, or (at your option) any later version.
12 PulseAudio is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with PulseAudio; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
23 /* This file is based on the GLIB utf8 validation functions. The
24 * original license text follows. */
26 /* gutf8.c - Operations on UTF-8 strings.
28 * Copyright (C) 1999 Tom Tromey
29 * Copyright (C) 2000 Red Hat, Inc.
31 * This library is free software; you can redistribute it and/or
32 * modify it under the terms of the GNU Lesser General Public
33 * License as published by the Free Software Foundation; either
34 * version 2 of the License, or (at your option) any later version.
36 * This library is distributed in the hope that it will be useful,
37 * but WITHOUT ANY WARRANTY; without even the implied warranty of
38 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
39 * Lesser General Public License for more details.
41 * You should have received a copy of the GNU Lesser General Public
42 * License along with this library; if not, write to the
43 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
44 * Boston, MA 02111-1307, USA.
60 #include <pulse/xmalloc.h>
61 #include <pulsecore/macro.h>
65 #define FILTER_CHAR '_'
67 static inline bool is_unicode_valid(uint32_t ch
) {
69 if (ch
>= 0x110000) /* End of unicode space */
71 if ((ch
& 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
73 if ((ch
>= 0xFDD0) && (ch
<= 0xFDEF)) /* Reserved */
75 if ((ch
& 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
81 static inline bool is_continuation_char(uint8_t ch
) {
82 if ((ch
& 0xc0) != 0x80) /* 10xxxxxx */
87 static inline void merge_continuation_char(uint32_t *u_ch
, uint8_t ch
) {
92 static char* utf8_validate(const char *str
, char *output
) {
95 const uint8_t *p
, *last
;
101 o
= (uint8_t*) output
;
102 for (p
= (const uint8_t*) str
; *p
; p
++) {
109 if ((*p
& 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
112 val
= (uint32_t) (*p
& 0x1e);
114 } else if ((*p
& 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
117 val
= (uint32_t) (*p
& 0x0f);
119 } else if ((*p
& 0xf8) == 0xf0) { /* 11110xxx four-char seq */
122 val
= (uint32_t) (*p
& 0x07);
127 if (!is_continuation_char(*p
))
129 merge_continuation_char(&val
, *p
);
133 if (!is_continuation_char(*p
))
135 merge_continuation_char(&val
, *p
);
139 if (!is_continuation_char(*p
))
141 merge_continuation_char(&val
, *p
);
146 if (!is_unicode_valid(val
))
150 memcpy(o
, last
, (size_t) size
);
159 p
= last
; /* We retry at the next character */
179 char* pa_utf8_valid (const char *str
) {
180 return utf8_validate(str
, NULL
);
183 char* pa_utf8_filter (const char *str
) {
187 new_str
= pa_xmalloc(strlen(str
) + 1);
188 return utf8_validate(str
, new_str
);
193 static char* iconv_simple(const char *str
, const char *to
, const char *from
) {
197 ICONV_CONST
char *inbuf
;
199 size_t res
, inbytes
, outbytes
;
205 cd
= iconv_open(to
, from
);
206 if (cd
== (iconv_t
)-1)
209 inlen
= len
= strlen(str
) + 1;
210 new_str
= pa_xmalloc(len
);
213 inbuf
= (ICONV_CONST
char*) str
; /* Brain dead prototype for iconv() */
218 res
= iconv(cd
, &inbuf
, &inbytes
, &outbuf
, &outbytes
);
220 if (res
!= (size_t)-1)
223 if (errno
!= E2BIG
) {
229 pa_assert(inbytes
!= 0);
232 new_str
= pa_xrealloc(new_str
, len
);
240 char* pa_utf8_to_locale (const char *str
) {
241 return iconv_simple(str
, "", "UTF-8");
244 char* pa_locale_to_utf8 (const char *str
) {
245 return iconv_simple(str
, "UTF-8", "");
250 char* pa_utf8_to_locale (const char *str
) {
253 return pa_ascii_filter(str
);
256 char* pa_locale_to_utf8 (const char *str
) {
259 if (pa_utf8_valid(str
))
260 return pa_xstrdup(str
);
267 char *pa_ascii_valid(const char *str
) {
271 for (p
= str
; *p
; p
++)
272 if ((unsigned char) *p
>= 128)
278 char *pa_ascii_filter(const char *str
) {
284 for (s
= r
, d
= r
; *s
; s
++)
285 if ((unsigned char) *s
< 128)