Line data Source code
1 : /* utf8conf.c - UTF8 character set conversion
2 : * Copyright (C) 1994, 1998, 1999, 2000, 2001, 2003, 2006,
3 : * 2008, 2010 Free Software Foundation, Inc.
4 : *
5 : * This file is part of GnuPG.
6 : *
7 : * GnuPG is free software; you can redistribute it and/or modify it
8 : * under the terms of either
9 : *
10 : * - the GNU Lesser General Public License as published by the Free
11 : * Software Foundation; either version 3 of the License, or (at
12 : * your option) any later version.
13 : *
14 : * or
15 : *
16 : * - the GNU General Public License as published by the Free
17 : * Software Foundation; either version 2 of the License, or (at
18 : * your option) any later version.
19 : *
20 : * or both in parallel, as here.
21 : *
22 : * GnuPG is distributed in the hope that it will be useful, but
23 : * WITHOUT ANY WARRANTY; without even the implied warranty of
24 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 : * General Public License for more details.
26 : *
27 : * You should have received a copies of the GNU General Public License
28 : * and the GNU Lesser General Public License along with this program;
29 : * if not, see <http://www.gnu.org/licenses/>.
30 : */
31 :
32 : #include <config.h>
33 : #include <stdlib.h>
34 : #include <string.h>
35 : #include <stdarg.h>
36 : #include <ctype.h>
37 : #ifdef HAVE_LANGINFO_CODESET
38 : #include <langinfo.h>
39 : #endif
40 : #include <errno.h>
41 : #ifndef HAVE_ANDROID_SYSTEM
42 : # include <iconv.h>
43 : #endif
44 :
45 : #include "util.h"
46 : #include "common-defs.h"
47 : #include "i18n.h"
48 : #include "stringhelp.h"
49 : #include "utf8conv.h"
50 :
51 : #ifndef MB_LEN_MAX
52 : #define MB_LEN_MAX 16
53 : #endif
54 :
55 : static const char *active_charset_name = "iso-8859-1";
56 : static int no_translation; /* Set to true if we let simply pass through. */
57 : static int use_iconv; /* iconv conversion functions required. */
58 :
59 :
60 : #ifdef HAVE_ANDROID_SYSTEM
61 : /* Fake stuff to get things building. */
62 : typedef void *iconv_t;
63 : #define ICONV_CONST
64 :
65 : static iconv_t
66 : iconv_open (const char *tocode, const char *fromcode)
67 : {
68 : (void)tocode;
69 : (void)fromcode;
70 : return (iconv_t)(-1);
71 : }
72 :
73 : static size_t
74 : iconv (iconv_t cd, char **inbuf, size_t *inbytesleft,
75 : char **outbuf, size_t *outbytesleft)
76 : {
77 : (void)cd;
78 : (void)inbuf;
79 : (void)inbytesleft;
80 : (void)outbuf;
81 : (void)outbytesleft;
82 : return (size_t)(0);
83 : }
84 :
85 : static int
86 : iconv_close (iconv_t cd)
87 : {
88 : (void)cd;
89 : return 0;
90 : }
91 : #endif /*HAVE_ANDROID_SYSTEM*/
92 :
93 :
94 : /* Error handler for iconv failures. This is needed to not clutter the
95 : output with repeated diagnostics about a missing conversion. */
96 : static void
97 0 : handle_iconv_error (const char *to, const char *from, int use_fallback)
98 : {
99 0 : if (errno == EINVAL)
100 : {
101 : static int shown1, shown2;
102 : int x;
103 :
104 0 : if (to && !strcmp (to, "utf-8"))
105 : {
106 0 : x = shown1;
107 0 : shown1 = 1;
108 : }
109 : else
110 : {
111 0 : x = shown2;
112 0 : shown2 = 1;
113 : }
114 :
115 0 : if (!x)
116 0 : log_info (_("conversion from '%s' to '%s' not available\n"),
117 : from, to);
118 : }
119 : else
120 : {
121 : static int shown;
122 :
123 0 : if (!shown)
124 0 : log_info (_("iconv_open failed: %s\n"), strerror (errno));
125 0 : shown = 1;
126 : }
127 :
128 0 : if (use_fallback)
129 : {
130 : /* To avoid further error messages we fallback to UTF-8 for the
131 : native encoding. Nowadays this seems to be the best bet in
132 : case of errors from iconv or nl_langinfo. */
133 0 : active_charset_name = "utf-8";
134 0 : no_translation = 0;
135 0 : use_iconv = 0;
136 : }
137 0 : }
138 :
139 :
140 :
141 : int
142 1349 : set_native_charset (const char *newset)
143 : {
144 : const char *full_newset;
145 :
146 1349 : if (!newset)
147 : {
148 : #ifdef HAVE_ANDROID_SYSTEM
149 : newset = "utf-8";
150 : #elif defined HAVE_W32_SYSTEM
151 : static char codepage[30];
152 : unsigned int cpno;
153 : const char *aliases;
154 :
155 : /* We are a console program thus we need to use the
156 : GetConsoleOutputCP function and not the the GetACP which
157 : would give the codepage for a GUI program. Note this is not
158 : a bulletproof detection because GetConsoleCP might return a
159 : different one for console input. Not sure how to cope with
160 : that. If the console Code page is not known we fall back to
161 : the system code page. */
162 : #ifndef HAVE_W32CE_SYSTEM
163 : cpno = GetConsoleOutputCP ();
164 : if (!cpno)
165 : #endif
166 : cpno = GetACP ();
167 : sprintf (codepage, "CP%u", cpno );
168 : /* Resolve alias. We use a long string string and not the usual
169 : array to optimize if the code is taken to a DSO. Taken from
170 : libiconv 1.9.2. */
171 : newset = codepage;
172 : for (aliases = ("CP936" "\0" "GBK" "\0"
173 : "CP1361" "\0" "JOHAB" "\0"
174 : "CP20127" "\0" "ASCII" "\0"
175 : "CP20866" "\0" "KOI8-R" "\0"
176 : "CP21866" "\0" "KOI8-RU" "\0"
177 : "CP28591" "\0" "ISO-8859-1" "\0"
178 : "CP28592" "\0" "ISO-8859-2" "\0"
179 : "CP28593" "\0" "ISO-8859-3" "\0"
180 : "CP28594" "\0" "ISO-8859-4" "\0"
181 : "CP28595" "\0" "ISO-8859-5" "\0"
182 : "CP28596" "\0" "ISO-8859-6" "\0"
183 : "CP28597" "\0" "ISO-8859-7" "\0"
184 : "CP28598" "\0" "ISO-8859-8" "\0"
185 : "CP28599" "\0" "ISO-8859-9" "\0"
186 : "CP28605" "\0" "ISO-8859-15" "\0"
187 : "CP65001" "\0" "UTF-8" "\0");
188 : *aliases;
189 : aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
190 : {
191 : if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
192 : {
193 : newset = aliases + strlen (aliases) + 1;
194 : break;
195 : }
196 : }
197 :
198 : #else /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
199 :
200 : #ifdef HAVE_LANGINFO_CODESET
201 1349 : newset = nl_langinfo (CODESET);
202 : #else /*!HAVE_LANGINFO_CODESET*/
203 : /* Try to get the used charset from environment variables. */
204 : static char codepage[30];
205 : const char *lc, *dot, *mod;
206 :
207 : strcpy (codepage, "iso-8859-1");
208 : lc = getenv ("LC_ALL");
209 : if (!lc || !*lc)
210 : {
211 : lc = getenv ("LC_CTYPE");
212 : if (!lc || !*lc)
213 : lc = getenv ("LANG");
214 : }
215 : if (lc && *lc)
216 : {
217 : dot = strchr (lc, '.');
218 : if (dot)
219 : {
220 : mod = strchr (++dot, '@');
221 : if (!mod)
222 : mod = dot + strlen (dot);
223 : if (mod - dot < sizeof codepage && dot != mod)
224 : {
225 : memcpy (codepage, dot, mod - dot);
226 : codepage [mod - dot] = 0;
227 : }
228 : }
229 : }
230 : newset = codepage;
231 : #endif /*!HAVE_LANGINFO_CODESET*/
232 : #endif /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
233 : }
234 :
235 1349 : full_newset = newset;
236 1349 : if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
237 : {
238 0 : newset += 3;
239 0 : if (*newset == '-' || *newset == '_')
240 0 : newset++;
241 : }
242 :
243 : /* Note that we silently assume that plain ASCII is actually meant
244 : as Latin-1. This makes sense because many Unix system don't have
245 : their locale set up properly and thus would get annoying error
246 : messages and we have to handle all the "bug" reports. Latin-1 has
247 : always been the character set used for 8 bit characters on Unix
248 : systems. */
249 1349 : if ( !*newset
250 1349 : || !ascii_strcasecmp (newset, "8859-1" )
251 1349 : || !ascii_strcasecmp (newset, "646" )
252 1349 : || !ascii_strcasecmp (newset, "ASCII" )
253 1349 : || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
254 : )
255 : {
256 3 : active_charset_name = "iso-8859-1";
257 3 : no_translation = 0;
258 3 : use_iconv = 0;
259 : }
260 1346 : else if ( !ascii_strcasecmp (newset, "utf8" )
261 1346 : || !ascii_strcasecmp(newset, "utf-8") )
262 : {
263 1346 : active_charset_name = "utf-8";
264 1346 : no_translation = 1;
265 1346 : use_iconv = 0;
266 : }
267 : else
268 : {
269 : iconv_t cd;
270 :
271 0 : cd = iconv_open (full_newset, "utf-8");
272 0 : if (cd == (iconv_t)-1)
273 : {
274 0 : handle_iconv_error (full_newset, "utf-8", 0);
275 0 : return -1;
276 : }
277 0 : iconv_close (cd);
278 0 : cd = iconv_open ("utf-8", full_newset);
279 0 : if (cd == (iconv_t)-1)
280 : {
281 0 : handle_iconv_error ("utf-8", full_newset, 0);
282 0 : return -1;
283 : }
284 0 : iconv_close (cd);
285 0 : active_charset_name = full_newset;
286 0 : no_translation = 0;
287 0 : use_iconv = 1;
288 : }
289 1349 : return 0;
290 : }
291 :
292 : const char *
293 0 : get_native_charset ()
294 : {
295 0 : return active_charset_name;
296 : }
297 :
298 : /* Return true if the native charset is utf-8. */
299 : int
300 0 : is_native_utf8 (void)
301 : {
302 0 : return no_translation;
303 : }
304 :
305 :
306 : /* Convert string, which is in native encoding to UTF8 and return a
307 : new allocated UTF-8 string. This function terminates the process
308 : on memory shortage. */
309 : char *
310 876 : native_to_utf8 (const char *orig_string)
311 : {
312 876 : const unsigned char *string = (const unsigned char *)orig_string;
313 : const unsigned char *s;
314 : char *buffer;
315 : unsigned char *p;
316 876 : size_t length = 0;
317 :
318 876 : if (no_translation)
319 : {
320 : /* Already utf-8 encoded. */
321 876 : buffer = xstrdup (orig_string);
322 : }
323 0 : else if (!use_iconv)
324 : {
325 : /* For Latin-1 we can avoid the iconv overhead. */
326 0 : for (s = string; *s; s++)
327 : {
328 0 : length++;
329 0 : if (*s & 0x80)
330 0 : length++;
331 : }
332 0 : buffer = xmalloc (length + 1);
333 0 : for (p = (unsigned char *)buffer, s = string; *s; s++)
334 : {
335 0 : if ( (*s & 0x80 ))
336 : {
337 0 : *p++ = 0xc0 | ((*s >> 6) & 3);
338 0 : *p++ = 0x80 | (*s & 0x3f);
339 : }
340 : else
341 0 : *p++ = *s;
342 : }
343 0 : *p = 0;
344 : }
345 : else
346 : {
347 : /* Need to use iconv. */
348 : iconv_t cd;
349 : const char *inptr;
350 : char *outptr;
351 : size_t inbytes, outbytes;
352 :
353 0 : cd = iconv_open ("utf-8", active_charset_name);
354 0 : if (cd == (iconv_t)-1)
355 : {
356 0 : handle_iconv_error ("utf-8", active_charset_name, 1);
357 0 : return native_to_utf8 (string);
358 : }
359 :
360 0 : for (s=string; *s; s++ )
361 : {
362 0 : length++;
363 0 : if ((*s & 0x80))
364 0 : length += 5; /* We may need up to 6 bytes for the utf8 output. */
365 : }
366 0 : buffer = xmalloc (length + 1);
367 :
368 0 : inptr = string;
369 0 : inbytes = strlen (string);
370 0 : outptr = buffer;
371 0 : outbytes = length;
372 0 : if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
373 : &outptr, &outbytes) == (size_t)-1)
374 : {
375 : static int shown;
376 :
377 0 : if (!shown)
378 0 : log_info (_("conversion from '%s' to '%s' failed: %s\n"),
379 0 : active_charset_name, "utf-8", strerror (errno));
380 0 : shown = 1;
381 : /* We don't do any conversion at all but use the strings as is. */
382 0 : strcpy (buffer, string);
383 : }
384 : else /* Success. */
385 : {
386 0 : *outptr = 0;
387 : /* We could realloc the buffer now but I doubt that it makes
388 : much sense given that it will get freed anyway soon
389 : after. */
390 : }
391 0 : iconv_close (cd);
392 : }
393 876 : return buffer;
394 : }
395 :
396 :
397 :
398 : static char *
399 671 : do_utf8_to_native (const char *string, size_t length, int delim,
400 : int with_iconv)
401 : {
402 : int nleft;
403 : int i;
404 : unsigned char encbuf[8];
405 : int encidx;
406 : const unsigned char *s;
407 : size_t n;
408 671 : char *buffer = NULL;
409 671 : char *p = NULL;
410 671 : unsigned long val = 0;
411 : size_t slen;
412 671 : int resync = 0;
413 :
414 : /* First pass (p==NULL): count the extended utf-8 characters. */
415 : /* Second pass (p!=NULL): create string. */
416 : for (;;)
417 : {
418 47472 : for (slen = length, nleft = encidx = 0, n = 0,
419 1342 : s = (const unsigned char *)string;
420 : slen;
421 43446 : s++, slen--)
422 : {
423 43446 : if (resync)
424 : {
425 0 : if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))
426 : {
427 : /* Still invalid. */
428 0 : if (p)
429 : {
430 0 : sprintf (p, "\\x%02x", *s);
431 0 : p += 4;
432 : }
433 0 : n += 4;
434 0 : continue;
435 : }
436 0 : resync = 0;
437 : }
438 43446 : if (!nleft)
439 : {
440 43446 : if (!(*s & 0x80))
441 : {
442 : /* Plain ascii. */
443 43446 : if ( delim != -1
444 43446 : && (*s < 0x20 || *s == 0x7f || *s == delim
445 43446 : || (delim && *s == '\\')))
446 : {
447 0 : n++;
448 0 : if (p)
449 0 : *p++ = '\\';
450 0 : switch (*s)
451 : {
452 0 : case '\n': n++; if ( p ) *p++ = 'n'; break;
453 0 : case '\r': n++; if ( p ) *p++ = 'r'; break;
454 0 : case '\f': n++; if ( p ) *p++ = 'f'; break;
455 0 : case '\v': n++; if ( p ) *p++ = 'v'; break;
456 0 : case '\b': n++; if ( p ) *p++ = 'b'; break;
457 0 : case 0: n++; if ( p ) *p++ = '0'; break;
458 : default:
459 0 : n += 3;
460 0 : if (p)
461 : {
462 0 : sprintf (p, "x%02x", *s);
463 0 : p += 3;
464 : }
465 0 : break;
466 : }
467 0 : }
468 : else
469 : {
470 43446 : if (p)
471 21723 : *p++ = *s;
472 43446 : n++;
473 : }
474 : }
475 0 : else if ((*s & 0xe0) == 0xc0) /* 110x xxxx */
476 : {
477 0 : val = *s & 0x1f;
478 0 : nleft = 1;
479 0 : encidx = 0;
480 0 : encbuf[encidx++] = *s;
481 : }
482 0 : else if ((*s & 0xf0) == 0xe0) /* 1110 xxxx */
483 : {
484 0 : val = *s & 0x0f;
485 0 : nleft = 2;
486 0 : encidx = 0;
487 0 : encbuf[encidx++] = *s;
488 : }
489 0 : else if ((*s & 0xf8) == 0xf0) /* 1111 0xxx */
490 : {
491 0 : val = *s & 0x07;
492 0 : nleft = 3;
493 0 : encidx = 0;
494 0 : encbuf[encidx++] = *s;
495 : }
496 0 : else if ((*s & 0xfc) == 0xf8) /* 1111 10xx */
497 : {
498 0 : val = *s & 0x03;
499 0 : nleft = 4;
500 0 : encidx = 0;
501 0 : encbuf[encidx++] = *s;
502 : }
503 0 : else if ((*s & 0xfe) == 0xfc) /* 1111 110x */
504 : {
505 0 : val = *s & 0x01;
506 0 : nleft = 5;
507 0 : encidx = 0;
508 0 : encbuf[encidx++] = *s;
509 : }
510 : else /* Invalid encoding: print as \xNN. */
511 : {
512 0 : if (p)
513 : {
514 0 : sprintf (p, "\\x%02x", *s);
515 0 : p += 4;
516 : }
517 0 : n += 4;
518 0 : resync = 1;
519 : }
520 : }
521 0 : else if (*s < 0x80 || *s >= 0xc0) /* Invalid utf-8 */
522 : {
523 0 : if (p)
524 : {
525 0 : for (i = 0; i < encidx; i++)
526 : {
527 0 : sprintf (p, "\\x%02x", encbuf[i]);
528 0 : p += 4;
529 : }
530 0 : sprintf (p, "\\x%02x", *s);
531 0 : p += 4;
532 : }
533 0 : n += 4 + 4 * encidx;
534 0 : nleft = 0;
535 0 : encidx = 0;
536 0 : resync = 1;
537 : }
538 : else
539 : {
540 0 : encbuf[encidx++] = *s;
541 0 : val <<= 6;
542 0 : val |= *s & 0x3f;
543 0 : if (!--nleft) /* Ready. */
544 : {
545 0 : if (no_translation)
546 : {
547 0 : if (p)
548 : {
549 0 : for (i = 0; i < encidx; i++)
550 0 : *p++ = encbuf[i];
551 : }
552 0 : n += encidx;
553 0 : encidx = 0;
554 : }
555 0 : else if (with_iconv)
556 : {
557 : /* Our strategy for using iconv is a bit strange
558 : but it better keeps compatibility with
559 : previous versions in regard to how invalid
560 : encodings are displayed. What we do is to
561 : keep the utf-8 as is and have the real
562 : translation step then at the end. Yes, I
563 : know that this is ugly. However we are short
564 : of the 1.4 release and for this branch we
565 : should not mess too much around with iconv
566 : things. One reason for this is that we don't
567 : know enough about non-GNU iconv
568 : implementation and want to minimize the risk
569 : of breaking the code on too many platforms. */
570 0 : if ( p )
571 : {
572 0 : for (i=0; i < encidx; i++ )
573 0 : *p++ = encbuf[i];
574 : }
575 0 : n += encidx;
576 0 : encidx = 0;
577 : }
578 : else /* Latin-1 case. */
579 : {
580 0 : if (val >= 0x80 && val < 256)
581 : {
582 : /* We can simply print this character */
583 0 : n++;
584 0 : if (p)
585 0 : *p++ = val;
586 : }
587 : else
588 : {
589 : /* We do not have a translation: print utf8. */
590 0 : if (p)
591 : {
592 0 : for (i = 0; i < encidx; i++)
593 : {
594 0 : sprintf (p, "\\x%02x", encbuf[i]);
595 0 : p += 4;
596 : }
597 : }
598 0 : n += encidx * 4;
599 0 : encidx = 0;
600 : }
601 : }
602 : }
603 :
604 : }
605 : }
606 1342 : if (!buffer)
607 : {
608 : /* Allocate the buffer after the first pass. */
609 671 : buffer = p = xmalloc (n + 1);
610 : }
611 671 : else if (with_iconv)
612 : {
613 : /* Note: See above for comments. */
614 : iconv_t cd;
615 : const char *inptr;
616 : char *outbuf, *outptr;
617 : size_t inbytes, outbytes;
618 :
619 0 : *p = 0; /* Terminate the buffer. */
620 :
621 0 : cd = iconv_open (active_charset_name, "utf-8");
622 0 : if (cd == (iconv_t)-1)
623 : {
624 0 : handle_iconv_error (active_charset_name, "utf-8", 1);
625 0 : xfree (buffer);
626 0 : return utf8_to_native (string, length, delim);
627 : }
628 :
629 : /* Allocate a new buffer large enough to hold all possible
630 : encodings. */
631 0 : n = p - buffer + 1;
632 0 : inbytes = n - 1;;
633 0 : inptr = buffer;
634 0 : outbytes = n * MB_LEN_MAX;
635 0 : if (outbytes / MB_LEN_MAX != n)
636 0 : BUG (); /* Actually an overflow. */
637 0 : outbuf = outptr = xmalloc (outbytes);
638 0 : if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
639 : &outptr, &outbytes) == (size_t)-1)
640 : {
641 : static int shown;
642 :
643 0 : if (!shown)
644 0 : log_info (_("conversion from '%s' to '%s' failed: %s\n"),
645 0 : "utf-8", active_charset_name, strerror (errno));
646 0 : shown = 1;
647 : /* Didn't worked out. Try again but without iconv. */
648 0 : xfree (buffer);
649 0 : buffer = NULL;
650 0 : xfree (outbuf);
651 0 : outbuf = do_utf8_to_native (string, length, delim, 0);
652 : }
653 : else /* Success. */
654 : {
655 0 : *outptr = 0; /* Make sure it is a string. */
656 : /* We could realloc the buffer now but I doubt that it
657 : makes much sense given that it will get freed
658 : anyway soon after. */
659 0 : xfree (buffer);
660 : }
661 0 : iconv_close (cd);
662 0 : return outbuf;
663 : }
664 : else /* Not using iconv. */
665 : {
666 671 : *p = 0; /* Make sure it is a string. */
667 671 : return buffer;
668 : }
669 671 : }
670 : }
671 :
672 : /* Convert string, which is in UTF-8 to native encoding. Replace
673 : illegal encodings by some "\xnn" and quote all control
674 : characters. A character with value DELIM will always be quoted, it
675 : must be a vanilla ASCII character. A DELIM value of -1 is special:
676 : it disables all quoting of control characters. This function
677 : terminates the process on memory shortage. */
678 : char *
679 671 : utf8_to_native (const char *string, size_t length, int delim)
680 : {
681 671 : return do_utf8_to_native (string, length, delim, use_iconv);
682 : }
683 :
684 :
685 :
686 :
687 : /* Wrapper function for iconv_open, required for W32 as we dlopen that
688 : library on that system. */
689 : jnlib_iconv_t
690 0 : jnlib_iconv_open (const char *tocode, const char *fromcode)
691 : {
692 0 : return (jnlib_iconv_t)iconv_open (tocode, fromcode);
693 : }
694 :
695 :
696 : /* Wrapper function for iconv, required for W32 as we dlopen that
697 : library on that system. */
698 : size_t
699 0 : jnlib_iconv (jnlib_iconv_t cd,
700 : const char **inbuf, size_t *inbytesleft,
701 : char **outbuf, size_t *outbytesleft)
702 : {
703 0 : return iconv ((iconv_t)cd, (char**)inbuf, inbytesleft, outbuf, outbytesleft);
704 : }
705 :
706 : /* Wrapper function for iconv_close, required for W32 as we dlopen that
707 : library on that system. */
708 : int
709 0 : jnlib_iconv_close (jnlib_iconv_t cd)
710 : {
711 0 : return iconv_close ((iconv_t)cd);
712 : }
713 :
714 :
715 : #ifdef HAVE_W32_SYSTEM
716 : /* Return a malloced string encoded in UTF-8 from the wide char input
717 : string STRING. Caller must free this value. Returns NULL and sets
718 : ERRNO on failure. Calling this function with STRING set to NULL is
719 : not defined. */
720 : char *
721 : wchar_to_utf8 (const wchar_t *string)
722 : {
723 : int n;
724 : char *result;
725 :
726 : n = WideCharToMultiByte (CP_UTF8, 0, string, -1, NULL, 0, NULL, NULL);
727 : if (n < 0)
728 : {
729 : gpg_err_set_errno (EINVAL);
730 : return NULL;
731 : }
732 :
733 : result = xtrymalloc (n+1);
734 : if (!result)
735 : return NULL;
736 :
737 : n = WideCharToMultiByte (CP_UTF8, 0, string, -1, result, n, NULL, NULL);
738 : if (n < 0)
739 : {
740 : xfree (result);
741 : gpg_err_set_errno (EINVAL);
742 : result = NULL;
743 : }
744 : return result;
745 : }
746 :
747 :
748 : /* Return a malloced wide char string from an UTF-8 encoded input
749 : string STRING. Caller must free this value. Returns NULL and sets
750 : ERRNO on failure. Calling this function with STRING set to NULL is
751 : not defined. */
752 : wchar_t *
753 : utf8_to_wchar (const char *string)
754 : {
755 : int n;
756 : size_t nbytes;
757 : wchar_t *result;
758 :
759 : n = MultiByteToWideChar (CP_UTF8, 0, string, -1, NULL, 0);
760 : if (n < 0)
761 : {
762 : gpg_err_set_errno (EINVAL);
763 : return NULL;
764 : }
765 :
766 : nbytes = (size_t)(n+1) * sizeof(*result);
767 : if (nbytes / sizeof(*result) != (n+1))
768 : {
769 : gpg_err_set_errno (ENOMEM);
770 : return NULL;
771 : }
772 : result = xtrymalloc (nbytes);
773 : if (!result)
774 : return NULL;
775 :
776 : n = MultiByteToWideChar (CP_UTF8, 0, string, -1, result, n);
777 : if (n < 0)
778 : {
779 : xfree (result);
780 : gpg_err_set_errno (EINVAL);
781 : result = NULL;
782 : }
783 : return result;
784 : }
785 : #endif /*HAVE_W32_SYSTEM*/
|