Line data Source code
1 : /* utf8conf.c - UTF8 character set conversion
2 : * Copyright (C) 1994, 1998, 1999, 2000, 2001, 2003, 2006,
3 : * 2008, 2010 Free Software Foundation, Inc.
4 : *
5 : * This file is part of GnuPG.
6 : *
7 : * GnuPG is free software; you can redistribute it and/or modify it
8 : * under the terms of either
9 : *
10 : * - the GNU Lesser General Public License as published by the Free
11 : * Software Foundation; either version 3 of the License, or (at
12 : * your option) any later version.
13 : *
14 : * or
15 : *
16 : * - the GNU General Public License as published by the Free
17 : * Software Foundation; either version 2 of the License, or (at
18 : * your option) any later version.
19 : *
20 : * or both in parallel, as here.
21 : *
22 : * GnuPG is distributed in the hope that it will be useful, but
23 : * WITHOUT ANY WARRANTY; without even the implied warranty of
24 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 : * General Public License for more details.
26 : *
27 : * You should have received a copies of the GNU General Public License
28 : * and the GNU Lesser General Public License along with this program;
29 : * if not, see <https://www.gnu.org/licenses/>.
30 : */
31 :
32 : #include <config.h>
33 : #include <stdlib.h>
34 : #include <string.h>
35 : #include <stdarg.h>
36 : #include <ctype.h>
37 : #ifdef HAVE_LANGINFO_CODESET
38 : #include <langinfo.h>
39 : #endif
40 : #include <errno.h>
41 :
42 : #if HAVE_W32_SYSTEM
43 : # /* Tell libgpg-error to provide the iconv macros. */
44 : # define GPGRT_ENABLE_W32_ICONV_MACROS 1
45 : #elif HAVE_ANDROID_SYSTEM
46 : # /* No iconv support. */
47 : #else
48 : # include <iconv.h>
49 : #endif
50 :
51 :
52 : #include "util.h"
53 : #include "common-defs.h"
54 : #include "i18n.h"
55 : #include "stringhelp.h"
56 : #include "utf8conv.h"
57 :
58 : #ifndef MB_LEN_MAX
59 : #define MB_LEN_MAX 16
60 : #endif
61 :
62 : static const char *active_charset_name = "iso-8859-1";
63 : static int no_translation; /* Set to true if we let simply pass through. */
64 : static int use_iconv; /* iconv conversion functions required. */
65 :
66 :
67 : #ifdef HAVE_ANDROID_SYSTEM
68 : /* Fake stuff to get things building. */
69 : typedef void *iconv_t;
70 : #define ICONV_CONST
71 :
72 : static iconv_t
73 : iconv_open (const char *tocode, const char *fromcode)
74 : {
75 : (void)tocode;
76 : (void)fromcode;
77 : return (iconv_t)(-1);
78 : }
79 :
80 : static size_t
81 : iconv (iconv_t cd, char **inbuf, size_t *inbytesleft,
82 : char **outbuf, size_t *outbytesleft)
83 : {
84 : (void)cd;
85 : (void)inbuf;
86 : (void)inbytesleft;
87 : (void)outbuf;
88 : (void)outbytesleft;
89 : return (size_t)(0);
90 : }
91 :
92 : static int
93 : iconv_close (iconv_t cd)
94 : {
95 : (void)cd;
96 : return 0;
97 : }
98 : #endif /*HAVE_ANDROID_SYSTEM*/
99 :
100 :
101 : /* Error handler for iconv failures. This is needed to not clutter the
102 : output with repeated diagnostics about a missing conversion. */
103 : static void
104 0 : handle_iconv_error (const char *to, const char *from, int use_fallback)
105 : {
106 0 : if (errno == EINVAL)
107 : {
108 : static int shown1, shown2;
109 : int x;
110 :
111 0 : if (to && !strcmp (to, "utf-8"))
112 : {
113 0 : x = shown1;
114 0 : shown1 = 1;
115 : }
116 : else
117 : {
118 0 : x = shown2;
119 0 : shown2 = 1;
120 : }
121 :
122 0 : if (!x)
123 0 : log_info (_("conversion from '%s' to '%s' not available\n"),
124 : from, to);
125 : }
126 : else
127 : {
128 : static int shown;
129 :
130 0 : if (!shown)
131 0 : log_info (_("iconv_open failed: %s\n"), strerror (errno));
132 0 : shown = 1;
133 : }
134 :
135 0 : if (use_fallback)
136 : {
137 : /* To avoid further error messages we fallback to UTF-8 for the
138 : native encoding. Nowadays this seems to be the best bet in
139 : case of errors from iconv or nl_langinfo. */
140 0 : active_charset_name = "utf-8";
141 0 : no_translation = 0;
142 0 : use_iconv = 0;
143 : }
144 0 : }
145 :
146 :
147 :
148 : int
149 1718 : set_native_charset (const char *newset)
150 : {
151 : const char *full_newset;
152 :
153 1718 : if (!newset)
154 : {
155 : #ifdef HAVE_ANDROID_SYSTEM
156 : newset = "utf-8";
157 : #elif defined HAVE_W32_SYSTEM
158 : static char codepage[30];
159 : unsigned int cpno;
160 : const char *aliases;
161 :
162 : /* We are a console program thus we need to use the
163 : GetConsoleOutputCP function and not the the GetACP which
164 : would give the codepage for a GUI program. Note this is not
165 : a bulletproof detection because GetConsoleCP might return a
166 : different one for console input. Not sure how to cope with
167 : that. If the console Code page is not known we fall back to
168 : the system code page. */
169 : #ifndef HAVE_W32CE_SYSTEM
170 : cpno = GetConsoleOutputCP ();
171 : if (!cpno)
172 : #endif
173 : cpno = GetACP ();
174 : sprintf (codepage, "CP%u", cpno );
175 : /* Resolve alias. We use a long string string and not the usual
176 : array to optimize if the code is taken to a DSO. Taken from
177 : libiconv 1.9.2. */
178 : newset = codepage;
179 : for (aliases = ("CP936" "\0" "GBK" "\0"
180 : "CP1361" "\0" "JOHAB" "\0"
181 : "CP20127" "\0" "ASCII" "\0"
182 : "CP20866" "\0" "KOI8-R" "\0"
183 : "CP21866" "\0" "KOI8-RU" "\0"
184 : "CP28591" "\0" "ISO-8859-1" "\0"
185 : "CP28592" "\0" "ISO-8859-2" "\0"
186 : "CP28593" "\0" "ISO-8859-3" "\0"
187 : "CP28594" "\0" "ISO-8859-4" "\0"
188 : "CP28595" "\0" "ISO-8859-5" "\0"
189 : "CP28596" "\0" "ISO-8859-6" "\0"
190 : "CP28597" "\0" "ISO-8859-7" "\0"
191 : "CP28598" "\0" "ISO-8859-8" "\0"
192 : "CP28599" "\0" "ISO-8859-9" "\0"
193 : "CP28605" "\0" "ISO-8859-15" "\0"
194 : "CP65001" "\0" "UTF-8" "\0");
195 : *aliases;
196 : aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
197 : {
198 : if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
199 : {
200 : newset = aliases + strlen (aliases) + 1;
201 : break;
202 : }
203 : }
204 :
205 : #else /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
206 :
207 : #ifdef HAVE_LANGINFO_CODESET
208 1718 : newset = nl_langinfo (CODESET);
209 : #else /*!HAVE_LANGINFO_CODESET*/
210 : /* Try to get the used charset from environment variables. */
211 : static char codepage[30];
212 : const char *lc, *dot, *mod;
213 :
214 : strcpy (codepage, "iso-8859-1");
215 : lc = getenv ("LC_ALL");
216 : if (!lc || !*lc)
217 : {
218 : lc = getenv ("LC_CTYPE");
219 : if (!lc || !*lc)
220 : lc = getenv ("LANG");
221 : }
222 : if (lc && *lc)
223 : {
224 : dot = strchr (lc, '.');
225 : if (dot)
226 : {
227 : mod = strchr (++dot, '@');
228 : if (!mod)
229 : mod = dot + strlen (dot);
230 : if (mod - dot < sizeof codepage && dot != mod)
231 : {
232 : memcpy (codepage, dot, mod - dot);
233 : codepage [mod - dot] = 0;
234 : }
235 : }
236 : }
237 : newset = codepage;
238 : #endif /*!HAVE_LANGINFO_CODESET*/
239 : #endif /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
240 : }
241 :
242 1718 : full_newset = newset;
243 1718 : if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
244 : {
245 0 : newset += 3;
246 0 : if (*newset == '-' || *newset == '_')
247 0 : newset++;
248 : }
249 :
250 : /* Note that we silently assume that plain ASCII is actually meant
251 : as Latin-1. This makes sense because many Unix system don't have
252 : their locale set up properly and thus would get annoying error
253 : messages and we have to handle all the "bug" reports. Latin-1 has
254 : traditionally been the character set used for 8 bit characters on
255 : Unix systems. */
256 1718 : if ( !*newset
257 1718 : || !ascii_strcasecmp (newset, "8859-1" )
258 1718 : || !ascii_strcasecmp (newset, "646" )
259 1718 : || !ascii_strcasecmp (newset, "ASCII" )
260 1718 : || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
261 : )
262 : {
263 1717 : active_charset_name = "iso-8859-1";
264 1717 : no_translation = 0;
265 1717 : use_iconv = 0;
266 : }
267 1 : else if ( !ascii_strcasecmp (newset, "utf8" )
268 1 : || !ascii_strcasecmp(newset, "utf-8") )
269 : {
270 1 : active_charset_name = "utf-8";
271 1 : no_translation = 1;
272 1 : use_iconv = 0;
273 : }
274 : else
275 : {
276 : iconv_t cd;
277 :
278 0 : cd = iconv_open (full_newset, "utf-8");
279 0 : if (cd == (iconv_t)-1)
280 : {
281 0 : handle_iconv_error (full_newset, "utf-8", 0);
282 0 : return -1;
283 : }
284 0 : iconv_close (cd);
285 0 : cd = iconv_open ("utf-8", full_newset);
286 0 : if (cd == (iconv_t)-1)
287 : {
288 0 : handle_iconv_error ("utf-8", full_newset, 0);
289 0 : return -1;
290 : }
291 0 : iconv_close (cd);
292 0 : active_charset_name = full_newset;
293 0 : no_translation = 0;
294 0 : use_iconv = 1;
295 : }
296 1718 : return 0;
297 : }
298 :
299 : const char *
300 0 : get_native_charset ()
301 : {
302 0 : return active_charset_name;
303 : }
304 :
305 : /* Return true if the native charset is utf-8. */
306 : int
307 20 : is_native_utf8 (void)
308 : {
309 20 : return no_translation;
310 : }
311 :
312 :
313 : /* Convert string, which is in native encoding to UTF8 and return a
314 : new allocated UTF-8 string. This function terminates the process
315 : on memory shortage. */
316 : char *
317 883 : native_to_utf8 (const char *orig_string)
318 : {
319 883 : const unsigned char *string = (const unsigned char *)orig_string;
320 : const unsigned char *s;
321 : char *buffer;
322 : unsigned char *p;
323 883 : size_t length = 0;
324 :
325 883 : if (no_translation)
326 : {
327 : /* Already utf-8 encoded. */
328 0 : buffer = xstrdup (orig_string);
329 : }
330 883 : else if (!use_iconv)
331 : {
332 : /* For Latin-1 we can avoid the iconv overhead. */
333 13743 : for (s = string; *s; s++)
334 : {
335 12860 : length++;
336 12860 : if (*s & 0x80)
337 0 : length++;
338 : }
339 883 : buffer = xmalloc (length + 1);
340 13743 : for (p = (unsigned char *)buffer, s = string; *s; s++)
341 : {
342 12860 : if ( (*s & 0x80 ))
343 : {
344 0 : *p++ = 0xc0 | ((*s >> 6) & 3);
345 0 : *p++ = 0x80 | (*s & 0x3f);
346 : }
347 : else
348 12860 : *p++ = *s;
349 : }
350 883 : *p = 0;
351 : }
352 : else
353 : {
354 : /* Need to use iconv. */
355 : iconv_t cd;
356 : const char *inptr;
357 : char *outptr;
358 : size_t inbytes, outbytes;
359 :
360 0 : cd = iconv_open ("utf-8", active_charset_name);
361 0 : if (cd == (iconv_t)-1)
362 : {
363 0 : handle_iconv_error ("utf-8", active_charset_name, 1);
364 0 : return native_to_utf8 (string);
365 : }
366 :
367 0 : for (s=string; *s; s++ )
368 : {
369 0 : length++;
370 0 : if ((*s & 0x80))
371 0 : length += 5; /* We may need up to 6 bytes for the utf8 output. */
372 : }
373 0 : buffer = xmalloc (length + 1);
374 :
375 0 : inptr = string;
376 0 : inbytes = strlen (string);
377 0 : outptr = buffer;
378 0 : outbytes = length;
379 0 : if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
380 : &outptr, &outbytes) == (size_t)-1)
381 : {
382 : static int shown;
383 :
384 0 : if (!shown)
385 0 : log_info (_("conversion from '%s' to '%s' failed: %s\n"),
386 0 : active_charset_name, "utf-8", strerror (errno));
387 0 : shown = 1;
388 : /* We don't do any conversion at all but use the strings as is. */
389 0 : strcpy (buffer, string);
390 : }
391 : else /* Success. */
392 : {
393 0 : *outptr = 0;
394 : /* We could realloc the buffer now but I doubt that it makes
395 : much sense given that it will get freed anyway soon
396 : after. */
397 : }
398 0 : iconv_close (cd);
399 : }
400 883 : return buffer;
401 : }
402 :
403 :
404 :
405 : static char *
406 880 : do_utf8_to_native (const char *string, size_t length, int delim,
407 : int with_iconv)
408 : {
409 : int nleft;
410 : int i;
411 : unsigned char encbuf[8];
412 : int encidx;
413 : const unsigned char *s;
414 : size_t n;
415 880 : char *buffer = NULL;
416 880 : char *p = NULL;
417 880 : unsigned long val = 0;
418 : size_t slen;
419 880 : int resync = 0;
420 :
421 : /* First pass (p==NULL): count the extended utf-8 characters. */
422 : /* Second pass (p!=NULL): create string. */
423 : for (;;)
424 : {
425 62144 : for (slen = length, nleft = encidx = 0, n = 0,
426 1760 : s = (const unsigned char *)string;
427 : slen;
428 56864 : s++, slen--)
429 : {
430 56864 : if (resync)
431 : {
432 0 : if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))
433 : {
434 : /* Still invalid. */
435 0 : if (p)
436 : {
437 0 : sprintf (p, "\\x%02x", *s);
438 0 : p += 4;
439 : }
440 0 : n += 4;
441 0 : continue;
442 : }
443 0 : resync = 0;
444 : }
445 56864 : if (!nleft)
446 : {
447 56864 : if (!(*s & 0x80))
448 : {
449 : /* Plain ascii. */
450 56864 : if ( delim != -1
451 56864 : && (*s < 0x20 || *s == 0x7f || *s == delim
452 56864 : || (delim && *s == '\\')))
453 : {
454 0 : n++;
455 0 : if (p)
456 0 : *p++ = '\\';
457 0 : switch (*s)
458 : {
459 0 : case '\n': n++; if ( p ) *p++ = 'n'; break;
460 0 : case '\r': n++; if ( p ) *p++ = 'r'; break;
461 0 : case '\f': n++; if ( p ) *p++ = 'f'; break;
462 0 : case '\v': n++; if ( p ) *p++ = 'v'; break;
463 0 : case '\b': n++; if ( p ) *p++ = 'b'; break;
464 0 : case 0: n++; if ( p ) *p++ = '0'; break;
465 : default:
466 0 : n += 3;
467 0 : if (p)
468 : {
469 0 : sprintf (p, "x%02x", *s);
470 0 : p += 3;
471 : }
472 0 : break;
473 : }
474 0 : }
475 : else
476 : {
477 56864 : if (p)
478 28432 : *p++ = *s;
479 56864 : n++;
480 : }
481 : }
482 0 : else if ((*s & 0xe0) == 0xc0) /* 110x xxxx */
483 : {
484 0 : val = *s & 0x1f;
485 0 : nleft = 1;
486 0 : encidx = 0;
487 0 : encbuf[encidx++] = *s;
488 : }
489 0 : else if ((*s & 0xf0) == 0xe0) /* 1110 xxxx */
490 : {
491 0 : val = *s & 0x0f;
492 0 : nleft = 2;
493 0 : encidx = 0;
494 0 : encbuf[encidx++] = *s;
495 : }
496 0 : else if ((*s & 0xf8) == 0xf0) /* 1111 0xxx */
497 : {
498 0 : val = *s & 0x07;
499 0 : nleft = 3;
500 0 : encidx = 0;
501 0 : encbuf[encidx++] = *s;
502 : }
503 0 : else if ((*s & 0xfc) == 0xf8) /* 1111 10xx */
504 : {
505 0 : val = *s & 0x03;
506 0 : nleft = 4;
507 0 : encidx = 0;
508 0 : encbuf[encidx++] = *s;
509 : }
510 0 : else if ((*s & 0xfe) == 0xfc) /* 1111 110x */
511 : {
512 0 : val = *s & 0x01;
513 0 : nleft = 5;
514 0 : encidx = 0;
515 0 : encbuf[encidx++] = *s;
516 : }
517 : else /* Invalid encoding: print as \xNN. */
518 : {
519 0 : if (p)
520 : {
521 0 : sprintf (p, "\\x%02x", *s);
522 0 : p += 4;
523 : }
524 0 : n += 4;
525 0 : resync = 1;
526 : }
527 : }
528 0 : else if (*s < 0x80 || *s >= 0xc0) /* Invalid utf-8 */
529 : {
530 0 : if (p)
531 : {
532 0 : for (i = 0; i < encidx; i++)
533 : {
534 0 : sprintf (p, "\\x%02x", encbuf[i]);
535 0 : p += 4;
536 : }
537 0 : sprintf (p, "\\x%02x", *s);
538 0 : p += 4;
539 : }
540 0 : n += 4 + 4 * encidx;
541 0 : nleft = 0;
542 0 : encidx = 0;
543 0 : resync = 1;
544 : }
545 : else
546 : {
547 0 : encbuf[encidx++] = *s;
548 0 : val <<= 6;
549 0 : val |= *s & 0x3f;
550 0 : if (!--nleft) /* Ready. */
551 : {
552 0 : if (no_translation)
553 : {
554 0 : if (p)
555 : {
556 0 : for (i = 0; i < encidx; i++)
557 0 : *p++ = encbuf[i];
558 : }
559 0 : n += encidx;
560 0 : encidx = 0;
561 : }
562 0 : else if (with_iconv)
563 : {
564 : /* Our strategy for using iconv is a bit strange
565 : but it better keeps compatibility with
566 : previous versions in regard to how invalid
567 : encodings are displayed. What we do is to
568 : keep the utf-8 as is and have the real
569 : translation step then at the end. Yes, I
570 : know that this is ugly. However we are short
571 : of the 1.4 release and for this branch we
572 : should not mess too much around with iconv
573 : things. One reason for this is that we don't
574 : know enough about non-GNU iconv
575 : implementation and want to minimize the risk
576 : of breaking the code on too many platforms. */
577 0 : if ( p )
578 : {
579 0 : for (i=0; i < encidx; i++ )
580 0 : *p++ = encbuf[i];
581 : }
582 0 : n += encidx;
583 0 : encidx = 0;
584 : }
585 : else /* Latin-1 case. */
586 : {
587 0 : if (val >= 0x80 && val < 256)
588 : {
589 : /* We can simply print this character */
590 0 : n++;
591 0 : if (p)
592 0 : *p++ = val;
593 : }
594 : else
595 : {
596 : /* We do not have a translation: print utf8. */
597 0 : if (p)
598 : {
599 0 : for (i = 0; i < encidx; i++)
600 : {
601 0 : sprintf (p, "\\x%02x", encbuf[i]);
602 0 : p += 4;
603 : }
604 : }
605 0 : n += encidx * 4;
606 0 : encidx = 0;
607 : }
608 : }
609 : }
610 :
611 : }
612 : }
613 1760 : if (!buffer)
614 : {
615 : /* Allocate the buffer after the first pass. */
616 880 : buffer = p = xmalloc (n + 1);
617 : }
618 880 : else if (with_iconv)
619 : {
620 : /* Note: See above for comments. */
621 : iconv_t cd;
622 : const char *inptr;
623 : char *outbuf, *outptr;
624 : size_t inbytes, outbytes;
625 :
626 0 : *p = 0; /* Terminate the buffer. */
627 :
628 0 : cd = iconv_open (active_charset_name, "utf-8");
629 0 : if (cd == (iconv_t)-1)
630 : {
631 0 : handle_iconv_error (active_charset_name, "utf-8", 1);
632 0 : xfree (buffer);
633 0 : return utf8_to_native (string, length, delim);
634 : }
635 :
636 : /* Allocate a new buffer large enough to hold all possible
637 : encodings. */
638 0 : n = p - buffer + 1;
639 0 : inbytes = n - 1;;
640 0 : inptr = buffer;
641 0 : outbytes = n * MB_LEN_MAX;
642 0 : if (outbytes / MB_LEN_MAX != n)
643 0 : BUG (); /* Actually an overflow. */
644 0 : outbuf = outptr = xmalloc (outbytes);
645 0 : if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
646 : &outptr, &outbytes) == (size_t)-1)
647 : {
648 : static int shown;
649 :
650 0 : if (!shown)
651 0 : log_info (_("conversion from '%s' to '%s' failed: %s\n"),
652 0 : "utf-8", active_charset_name, strerror (errno));
653 0 : shown = 1;
654 : /* Didn't worked out. Try again but without iconv. */
655 0 : xfree (buffer);
656 0 : buffer = NULL;
657 0 : xfree (outbuf);
658 0 : outbuf = do_utf8_to_native (string, length, delim, 0);
659 : }
660 : else /* Success. */
661 : {
662 0 : *outptr = 0; /* Make sure it is a string. */
663 : /* We could realloc the buffer now but I doubt that it
664 : makes much sense given that it will get freed
665 : anyway soon after. */
666 0 : xfree (buffer);
667 : }
668 0 : iconv_close (cd);
669 0 : return outbuf;
670 : }
671 : else /* Not using iconv. */
672 : {
673 880 : *p = 0; /* Make sure it is a string. */
674 880 : return buffer;
675 : }
676 880 : }
677 : }
678 :
679 : /* Convert string, which is in UTF-8 to native encoding. Replace
680 : illegal encodings by some "\xnn" and quote all control
681 : characters. A character with value DELIM will always be quoted, it
682 : must be a vanilla ASCII character. A DELIM value of -1 is special:
683 : it disables all quoting of control characters. This function
684 : terminates the process on memory shortage. */
685 : char *
686 880 : utf8_to_native (const char *string, size_t length, int delim)
687 : {
688 880 : return do_utf8_to_native (string, length, delim, use_iconv);
689 : }
690 :
691 :
692 :
693 :
694 : /* Wrapper function for iconv_open, required for W32 as we dlopen that
695 : library on that system. */
696 : jnlib_iconv_t
697 0 : jnlib_iconv_open (const char *tocode, const char *fromcode)
698 : {
699 0 : return (jnlib_iconv_t)iconv_open (tocode, fromcode);
700 : }
701 :
702 :
703 : /* Wrapper function for iconv, required for W32 as we dlopen that
704 : library on that system. */
705 : size_t
706 0 : jnlib_iconv (jnlib_iconv_t cd,
707 : const char **inbuf, size_t *inbytesleft,
708 : char **outbuf, size_t *outbytesleft)
709 : {
710 0 : return iconv ((iconv_t)cd, (ICONV_CONST char**)inbuf, inbytesleft,
711 : outbuf, outbytesleft);
712 : }
713 :
714 : /* Wrapper function for iconv_close, required for W32 as we dlopen that
715 : library on that system. */
716 : int
717 0 : jnlib_iconv_close (jnlib_iconv_t cd)
718 : {
719 0 : return iconv_close ((iconv_t)cd);
720 : }
721 :
722 :
723 : #ifdef HAVE_W32_SYSTEM
724 : /* Return a malloced string encoded for CODEPAGE from the wide char input
725 : string STRING. Caller must free this value. Returns NULL and sets
726 : ERRNO on failure. Calling this function with STRING set to NULL is
727 : not defined. */
728 : static char *
729 : wchar_to_cp (const wchar_t *string, unsigned int codepage)
730 : {
731 : int n;
732 : char *result;
733 :
734 : n = WideCharToMultiByte (codepage, 0, string, -1, NULL, 0, NULL, NULL);
735 : if (n < 0)
736 : {
737 : gpg_err_set_errno (EINVAL);
738 : return NULL;
739 : }
740 :
741 : result = xtrymalloc (n+1);
742 : if (!result)
743 : return NULL;
744 :
745 : n = WideCharToMultiByte (codepage, 0, string, -1, result, n, NULL, NULL);
746 : if (n < 0)
747 : {
748 : xfree (result);
749 : gpg_err_set_errno (EINVAL);
750 : result = NULL;
751 : }
752 : return result;
753 : }
754 :
755 :
756 : /* Return a malloced wide char string from a CODEPAGE encoded input
757 : string STRING. Caller must free this value. Returns NULL and sets
758 : ERRNO on failure. Calling this function with STRING set to NULL is
759 : not defined. */
760 : static wchar_t *
761 : cp_to_wchar (const char *string, unsigned int codepage)
762 : {
763 : int n;
764 : size_t nbytes;
765 : wchar_t *result;
766 :
767 : n = MultiByteToWideChar (codepage, 0, string, -1, NULL, 0);
768 : if (n < 0)
769 : {
770 : gpg_err_set_errno (EINVAL);
771 : return NULL;
772 : }
773 :
774 : nbytes = (size_t)(n+1) * sizeof(*result);
775 : if (nbytes / sizeof(*result) != (n+1))
776 : {
777 : gpg_err_set_errno (ENOMEM);
778 : return NULL;
779 : }
780 : result = xtrymalloc (nbytes);
781 : if (!result)
782 : return NULL;
783 :
784 : n = MultiByteToWideChar (codepage, 0, string, -1, result, n);
785 : if (n < 0)
786 : {
787 : xfree (result);
788 : gpg_err_set_errno (EINVAL);
789 : result = NULL;
790 : }
791 : return result;
792 : }
793 :
794 :
795 : /* Return a malloced string encoded in the active code page from the
796 : * wide char input string STRING. Caller must free this value.
797 : * Returns NULL and sets ERRNO on failure. Calling this function with
798 : * STRING set to NULL is not defined. */
799 : char *
800 : wchar_to_native (const wchar_t *string)
801 : {
802 : return wchar_to_cp (string, CP_ACP);
803 : }
804 :
805 :
806 : /* Return a malloced wide char string from an UTF-8 encoded input
807 : * string STRING. Caller must free this value. Returns NULL and sets
808 : * ERRNO on failure. Calling this function with STRING set to NULL is
809 : * not defined. */
810 : wchar_t *
811 : native_to_wchar (const char *string)
812 : {
813 : return cp_to_wchar (string, CP_ACP);
814 : }
815 :
816 :
817 : /* Return a malloced string encoded in UTF-8 from the wide char input
818 : * string STRING. Caller must free this value. Returns NULL and sets
819 : * ERRNO on failure. Calling this function with STRING set to NULL is
820 : * not defined. */
821 : char *
822 : wchar_to_utf8 (const wchar_t *string)
823 : {
824 : return wchar_to_cp (string, CP_UTF8);
825 : }
826 :
827 :
828 : /* Return a malloced wide char string from an UTF-8 encoded input
829 : * string STRING. Caller must free this value. Returns NULL and sets
830 : * ERRNO on failure. Calling this function with STRING set to NULL is
831 : * not defined. */
832 : wchar_t *
833 : utf8_to_wchar (const char *string)
834 : {
835 : return cp_to_wchar (string, CP_UTF8);
836 : }
837 :
838 : #endif /*HAVE_W32_SYSTEM*/
|