]> code.delx.au - pulseaudio/blob - src/pulse/utf8.c
channelmap, volume: Don't refer to bool in the public API documentation
[pulseaudio] / src / pulse / utf8.c
1 /***
2 This file is part of PulseAudio.
3
4 Copyright 2006 Lennart Poettering
5 Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB
6
7 PulseAudio is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as
9 published by the Free Software Foundation; either version 2.1 of the
10 License, or (at your option) any later version.
11
12 PulseAudio is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with PulseAudio; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 USA.
21 ***/
22
23 /* This file is based on the GLIB utf8 validation functions. The
24 * original license text follows. */
25
26 /* gutf8.c - Operations on UTF-8 strings.
27 *
28 * Copyright (C) 1999 Tom Tromey
29 * Copyright (C) 2000 Red Hat, Inc.
30 *
31 * This library is free software; you can redistribute it and/or
32 * modify it under the terms of the GNU Lesser General Public
33 * License as published by the Free Software Foundation; either
34 * version 2 of the License, or (at your option) any later version.
35 *
36 * This library is distributed in the hope that it will be useful,
37 * but WITHOUT ANY WARRANTY; without even the implied warranty of
38 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
39 * Lesser General Public License for more details.
40 *
41 * You should have received a copy of the GNU Lesser General Public
42 * License along with this library; if not, write to the
43 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
44 * Boston, MA 02111-1307, USA.
45 */
46
47 #ifdef HAVE_CONFIG_H
48 #include <config.h>
49 #endif
50
51 #include <errno.h>
52 #include <stdlib.h>
53 #include <inttypes.h>
54 #include <string.h>
55
56 #ifdef HAVE_ICONV
57 #include <iconv.h>
58 #endif
59
60 #include <pulse/xmalloc.h>
61 #include <pulsecore/macro.h>
62
63 #include "utf8.h"
64
65 #define FILTER_CHAR '_'
66
67 static inline bool is_unicode_valid(uint32_t ch) {
68
69 if (ch >= 0x110000) /* End of unicode space */
70 return false;
71 if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
72 return false;
73 if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */
74 return false;
75 if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
76 return false;
77
78 return true;
79 }
80
81 static inline bool is_continuation_char(uint8_t ch) {
82 if ((ch & 0xc0) != 0x80) /* 10xxxxxx */
83 return false;
84 return true;
85 }
86
87 static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {
88 *u_ch <<= 6;
89 *u_ch |= ch & 0x3f;
90 }
91
92 static char* utf8_validate(const char *str, char *output) {
93 uint32_t val = 0;
94 uint32_t min = 0;
95 const uint8_t *p, *last;
96 int size;
97 uint8_t *o;
98
99 pa_assert(str);
100
101 o = (uint8_t*) output;
102 for (p = (const uint8_t*) str; *p; p++) {
103 if (*p < 128) {
104 if (o)
105 *o = *p;
106 } else {
107 last = p;
108
109 if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
110 size = 2;
111 min = 128;
112 val = (uint32_t) (*p & 0x1e);
113 goto ONE_REMAINING;
114 } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
115 size = 3;
116 min = (1 << 11);
117 val = (uint32_t) (*p & 0x0f);
118 goto TWO_REMAINING;
119 } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
120 size = 4;
121 min = (1 << 16);
122 val = (uint32_t) (*p & 0x07);
123 } else
124 goto error;
125
126 p++;
127 if (!is_continuation_char(*p))
128 goto error;
129 merge_continuation_char(&val, *p);
130
131 TWO_REMAINING:
132 p++;
133 if (!is_continuation_char(*p))
134 goto error;
135 merge_continuation_char(&val, *p);
136
137 ONE_REMAINING:
138 p++;
139 if (!is_continuation_char(*p))
140 goto error;
141 merge_continuation_char(&val, *p);
142
143 if (val < min)
144 goto error;
145
146 if (!is_unicode_valid(val))
147 goto error;
148
149 if (o) {
150 memcpy(o, last, (size_t) size);
151 o += size;
152 }
153
154 continue;
155
156 error:
157 if (o) {
158 *o = FILTER_CHAR;
159 p = last; /* We retry at the next character */
160 } else
161 goto failure;
162 }
163
164 if (o)
165 o++;
166 }
167
168 if (o) {
169 *o = '\0';
170 return output;
171 }
172
173 return (char*) str;
174
175 failure:
176 return NULL;
177 }
178
179 char* pa_utf8_valid (const char *str) {
180 return utf8_validate(str, NULL);
181 }
182
183 char* pa_utf8_filter (const char *str) {
184 char *new_str;
185
186 pa_assert(str);
187 new_str = pa_xmalloc(strlen(str) + 1);
188 return utf8_validate(str, new_str);
189 }
190
191 #ifdef HAVE_ICONV
192
193 static char* iconv_simple(const char *str, const char *to, const char *from) {
194 char *new_str;
195 size_t len, inlen;
196 iconv_t cd;
197 ICONV_CONST char *inbuf;
198 char *outbuf;
199 size_t res, inbytes, outbytes;
200
201 pa_assert(str);
202 pa_assert(to);
203 pa_assert(from);
204
205 cd = iconv_open(to, from);
206 if (cd == (iconv_t)-1)
207 return NULL;
208
209 inlen = len = strlen(str) + 1;
210 new_str = pa_xmalloc(len);
211
212 for (;;) {
213 inbuf = (ICONV_CONST char*) str; /* Brain dead prototype for iconv() */
214 inbytes = inlen;
215 outbuf = new_str;
216 outbytes = len;
217
218 res = iconv(cd, &inbuf, &inbytes, &outbuf, &outbytes);
219
220 if (res != (size_t)-1)
221 break;
222
223 if (errno != E2BIG) {
224 pa_xfree(new_str);
225 new_str = NULL;
226 break;
227 }
228
229 pa_assert(inbytes != 0);
230
231 len += inbytes;
232 new_str = pa_xrealloc(new_str, len);
233 }
234
235 iconv_close(cd);
236
237 return new_str;
238 }
239
240 char* pa_utf8_to_locale (const char *str) {
241 return iconv_simple(str, "", "UTF-8");
242 }
243
244 char* pa_locale_to_utf8 (const char *str) {
245 return iconv_simple(str, "UTF-8", "");
246 }
247
248 #else
249
250 char* pa_utf8_to_locale (const char *str) {
251 pa_assert(str);
252
253 return pa_ascii_filter(str);
254 }
255
256 char* pa_locale_to_utf8 (const char *str) {
257 pa_assert(str);
258
259 if (pa_utf8_valid(str))
260 return pa_xstrdup(str);
261
262 return NULL;
263 }
264
265 #endif
266
267 char *pa_ascii_valid(const char *str) {
268 const char *p;
269 pa_assert(str);
270
271 for (p = str; *p; p++)
272 if ((unsigned char) *p >= 128)
273 return NULL;
274
275 return (char*) str;
276 }
277
278 char *pa_ascii_filter(const char *str) {
279 char *r, *s, *d;
280 pa_assert(str);
281
282 r = pa_xstrdup(str);
283
284 for (s = r, d = r; *s; s++)
285 if ((unsigned char) *s < 128)
286 *(d++) = *s;
287
288 *d = 0;
289
290 return r;
291 }