LCOV - coverage.info - common/utf8conv.c

LCOV - code coverage report

Current view:	top level - common - utf8conv.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	60	242	24.8 %
Date:	2016-11-29 15:00:56	Functions:	5	10	50.0 %

          Line data    Source code

       1             : /* utf8conf.c -  UTF8 character set conversion
       2             :  * Copyright (C) 1994, 1998, 1999, 2000, 2001, 2003, 2006,
       3             :  *               2008, 2010  Free Software Foundation, Inc.
       4             :  *
       5             :  * This file is part of GnuPG.
       6             :  *
       7             :  * GnuPG is free software; you can redistribute it and/or modify it
       8             :  * under the terms of either
       9             :  *
      10             :  *   - the GNU Lesser General Public License as published by the Free
      11             :  *     Software Foundation; either version 3 of the License, or (at
      12             :  *     your option) any later version.
      13             :  *
      14             :  * or
      15             :  *
      16             :  *   - the GNU General Public License as published by the Free
      17             :  *     Software Foundation; either version 2 of the License, or (at
      18             :  *     your option) any later version.
      19             :  *
      20             :  * or both in parallel, as here.
      21             :  *
      22             :  * GnuPG is distributed in the hope that it will be useful, but
      23             :  * WITHOUT ANY WARRANTY; without even the implied warranty of
      24             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      25             :  * General Public License for more details.
      26             :  *
      27             :  * You should have received a copies of the GNU General Public License
      28             :  * and the GNU Lesser General Public License along with this program;
      29             :  * if not, see <https://www.gnu.org/licenses/>.
      30             :  */
      31             : 
      32             : #include <config.h>
      33             : #include <stdlib.h>
      34             : #include <string.h>
      35             : #include <stdarg.h>
      36             : #include <ctype.h>
      37             : #ifdef HAVE_LANGINFO_CODESET
      38             : #include <langinfo.h>
      39             : #endif
      40             : #include <errno.h>
      41             : 
      42             : #if HAVE_W32_SYSTEM
      43             : # /* Tell libgpg-error to provide the iconv macros.  */
      44             : # define GPGRT_ENABLE_W32_ICONV_MACROS 1
      45             : #elif HAVE_ANDROID_SYSTEM
      46             : # /* No iconv support.  */
      47             : #else
      48             : # include <iconv.h>
      49             : #endif
      50             : 
      51             : 
      52             : #include "util.h"
      53             : #include "common-defs.h"
      54             : #include "i18n.h"
      55             : #include "stringhelp.h"
      56             : #include "utf8conv.h"
      57             : 
      58             : #ifndef MB_LEN_MAX
      59             : #define MB_LEN_MAX 16
      60             : #endif
      61             : 
      62             : static const char *active_charset_name = "iso-8859-1";
      63             : static int no_translation;     /* Set to true if we let simply pass through. */
      64             : static int use_iconv;          /* iconv conversion functions required. */
      65             : 
      66             : 
      67             : #ifdef HAVE_ANDROID_SYSTEM
      68             : /* Fake stuff to get things building.  */
      69             : typedef void *iconv_t;
      70             : #define ICONV_CONST
      71             : 
      72             : static iconv_t
      73             : iconv_open (const char *tocode, const char *fromcode)
      74             : {
      75             :   (void)tocode;
      76             :   (void)fromcode;
      77             :   return (iconv_t)(-1);
      78             : }
      79             : 
      80             : static size_t
      81             : iconv (iconv_t cd, char **inbuf, size_t *inbytesleft,
      82             :        char **outbuf, size_t *outbytesleft)
      83             : {
      84             :   (void)cd;
      85             :   (void)inbuf;
      86             :   (void)inbytesleft;
      87             :   (void)outbuf;
      88             :   (void)outbytesleft;
      89             :   return (size_t)(0);
      90             : }
      91             : 
      92             : static int
      93             : iconv_close (iconv_t cd)
      94             : {
      95             :   (void)cd;
      96             :   return 0;
      97             : }
      98             : #endif /*HAVE_ANDROID_SYSTEM*/
      99             : 
     100             : 
     101             : /* Error handler for iconv failures. This is needed to not clutter the
     102             :    output with repeated diagnostics about a missing conversion. */
     103             : static void
     104           0 : handle_iconv_error (const char *to, const char *from, int use_fallback)
     105             : {
     106           0 :   if (errno == EINVAL)
     107             :     {
     108             :       static int shown1, shown2;
     109             :       int x;
     110             : 
     111           0 :       if (to && !strcmp (to, "utf-8"))
     112             :         {
     113           0 :           x = shown1;
     114           0 :           shown1 = 1;
     115             :         }
     116             :       else
     117             :         {
     118           0 :           x = shown2;
     119           0 :           shown2 = 1;
     120             :         }
     121             : 
     122           0 :       if (!x)
     123           0 :         log_info (_("conversion from '%s' to '%s' not available\n"),
     124             :                   from, to);
     125             :     }
     126             :   else
     127             :     {
     128             :       static int shown;
     129             : 
     130           0 :       if (!shown)
     131           0 :         log_info (_("iconv_open failed: %s\n"), strerror (errno));
     132           0 :       shown = 1;
     133             :     }
     134             : 
     135           0 :   if (use_fallback)
     136             :     {
     137             :       /* To avoid further error messages we fallback to UTF-8 for the
     138             :          native encoding.  Nowadays this seems to be the best bet in
     139             :          case of errors from iconv or nl_langinfo.  */
     140           0 :       active_charset_name = "utf-8";
     141           0 :       no_translation = 0;
     142           0 :       use_iconv = 0;
     143             :     }
     144           0 : }
     145             : 
     146             : 
     147             : 
     148             : int
     149        1718 : set_native_charset (const char *newset)
     150             : {
     151             :   const char *full_newset;
     152             : 
     153        1718 :   if (!newset)
     154             :     {
     155             : #ifdef HAVE_ANDROID_SYSTEM
     156             :       newset = "utf-8";
     157             : #elif defined HAVE_W32_SYSTEM
     158             :       static char codepage[30];
     159             :       unsigned int cpno;
     160             :       const char *aliases;
     161             : 
     162             :       /* We are a console program thus we need to use the
     163             :          GetConsoleOutputCP function and not the the GetACP which
     164             :          would give the codepage for a GUI program.  Note this is not
     165             :          a bulletproof detection because GetConsoleCP might return a
     166             :          different one for console input.  Not sure how to cope with
     167             :          that.  If the console Code page is not known we fall back to
     168             :          the system code page.  */
     169             : #ifndef HAVE_W32CE_SYSTEM
     170             :       cpno = GetConsoleOutputCP ();
     171             :       if (!cpno)
     172             : #endif
     173             :         cpno = GetACP ();
     174             :       sprintf (codepage, "CP%u", cpno );
     175             :       /* Resolve alias.  We use a long string string and not the usual
     176             :          array to optimize if the code is taken to a DSO.  Taken from
     177             :          libiconv 1.9.2. */
     178             :       newset = codepage;
     179             :       for (aliases = ("CP936"   "\0" "GBK" "\0"
     180             :                       "CP1361"  "\0" "JOHAB" "\0"
     181             :                       "CP20127" "\0" "ASCII" "\0"
     182             :                       "CP20866" "\0" "KOI8-R" "\0"
     183             :                       "CP21866" "\0" "KOI8-RU" "\0"
     184             :                       "CP28591" "\0" "ISO-8859-1" "\0"
     185             :                       "CP28592" "\0" "ISO-8859-2" "\0"
     186             :                       "CP28593" "\0" "ISO-8859-3" "\0"
     187             :                       "CP28594" "\0" "ISO-8859-4" "\0"
     188             :                       "CP28595" "\0" "ISO-8859-5" "\0"
     189             :                       "CP28596" "\0" "ISO-8859-6" "\0"
     190             :                       "CP28597" "\0" "ISO-8859-7" "\0"
     191             :                       "CP28598" "\0" "ISO-8859-8" "\0"
     192             :                       "CP28599" "\0" "ISO-8859-9" "\0"
     193             :                       "CP28605" "\0" "ISO-8859-15" "\0"
     194             :                       "CP65001" "\0" "UTF-8" "\0");
     195             :            *aliases;
     196             :            aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
     197             :         {
     198             :           if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
     199             :             {
     200             :               newset = aliases + strlen (aliases) + 1;
     201             :               break;
     202             :             }
     203             :         }
     204             : 
     205             : #else /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
     206             : 
     207             : #ifdef HAVE_LANGINFO_CODESET
     208        1718 :       newset = nl_langinfo (CODESET);
     209             : #else /*!HAVE_LANGINFO_CODESET*/
     210             :       /* Try to get the used charset from environment variables.  */
     211             :       static char codepage[30];
     212             :       const char *lc, *dot, *mod;
     213             : 
     214             :       strcpy (codepage, "iso-8859-1");
     215             :       lc = getenv ("LC_ALL");
     216             :       if (!lc || !*lc)
     217             :         {
     218             :           lc = getenv ("LC_CTYPE");
     219             :           if (!lc || !*lc)
     220             :             lc = getenv ("LANG");
     221             :         }
     222             :       if (lc && *lc)
     223             :         {
     224             :           dot = strchr (lc, '.');
     225             :           if (dot)
     226             :             {
     227             :               mod = strchr (++dot, '@');
     228             :               if (!mod)
     229             :                 mod = dot + strlen (dot);
     230             :               if (mod - dot < sizeof codepage && dot != mod)
     231             :                 {
     232             :                   memcpy (codepage, dot, mod - dot);
     233             :                   codepage [mod - dot] = 0;
     234             :                 }
     235             :             }
     236             :         }
     237             :       newset = codepage;
     238             : #endif /*!HAVE_LANGINFO_CODESET*/
     239             : #endif /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
     240             :     }
     241             : 
     242        1718 :   full_newset = newset;
     243        1718 :   if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
     244             :     {
     245           0 :       newset += 3;
     246           0 :       if (*newset == '-' || *newset == '_')
     247           0 :         newset++;
     248             :     }
     249             : 
     250             :   /* Note that we silently assume that plain ASCII is actually meant
     251             :      as Latin-1.  This makes sense because many Unix system don't have
     252             :      their locale set up properly and thus would get annoying error
     253             :      messages and we have to handle all the "bug" reports. Latin-1 has
     254             :      traditionally been the character set used for 8 bit characters on
     255             :      Unix systems. */
     256        1718 :   if ( !*newset
     257        1718 :        || !ascii_strcasecmp (newset, "8859-1" )
     258        1718 :        || !ascii_strcasecmp (newset, "646" )
     259        1718 :        || !ascii_strcasecmp (newset, "ASCII" )
     260        1718 :        || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
     261             :        )
     262             :     {
     263        1717 :       active_charset_name = "iso-8859-1";
     264        1717 :       no_translation = 0;
     265        1717 :       use_iconv = 0;
     266             :     }
     267           1 :   else if ( !ascii_strcasecmp (newset, "utf8" )
     268           1 :             || !ascii_strcasecmp(newset, "utf-8") )
     269             :     {
     270           1 :       active_charset_name = "utf-8";
     271           1 :       no_translation = 1;
     272           1 :       use_iconv = 0;
     273             :     }
     274             :   else
     275             :     {
     276             :       iconv_t cd;
     277             : 
     278           0 :       cd = iconv_open (full_newset, "utf-8");
     279           0 :       if (cd == (iconv_t)-1)
     280             :         {
     281           0 :           handle_iconv_error (full_newset, "utf-8", 0);
     282           0 :           return -1;
     283             :         }
     284           0 :       iconv_close (cd);
     285           0 :       cd = iconv_open ("utf-8", full_newset);
     286           0 :       if (cd == (iconv_t)-1)
     287             :         {
     288           0 :           handle_iconv_error ("utf-8", full_newset, 0);
     289           0 :           return -1;
     290             :         }
     291           0 :       iconv_close (cd);
     292           0 :       active_charset_name = full_newset;
     293           0 :       no_translation = 0;
     294           0 :       use_iconv = 1;
     295             :     }
     296        1718 :   return 0;
     297             : }
     298             : 
     299             : const char *
     300           0 : get_native_charset ()
     301             : {
     302           0 :   return active_charset_name;
     303             : }
     304             : 
     305             : /* Return true if the native charset is utf-8.  */
     306             : int
     307          20 : is_native_utf8 (void)
     308             : {
     309          20 :   return no_translation;
     310             : }
     311             : 
     312             : 
     313             : /* Convert string, which is in native encoding to UTF8 and return a
     314             :    new allocated UTF-8 string.  This function terminates the process
     315             :    on memory shortage.  */
     316             : char *
     317         883 : native_to_utf8 (const char *orig_string)
     318             : {
     319         883 :   const unsigned char *string = (const unsigned char *)orig_string;
     320             :   const unsigned char *s;
     321             :   char *buffer;
     322             :   unsigned char *p;
     323         883 :   size_t length = 0;
     324             : 
     325         883 :   if (no_translation)
     326             :     {
     327             :       /* Already utf-8 encoded. */
     328           0 :       buffer = xstrdup (orig_string);
     329             :     }
     330         883 :   else if (!use_iconv)
     331             :     {
     332             :       /* For Latin-1 we can avoid the iconv overhead. */
     333       13743 :       for (s = string; *s; s++)
     334             :         {
     335       12860 :           length++;
     336       12860 :           if (*s & 0x80)
     337           0 :             length++;
     338             :         }
     339         883 :       buffer = xmalloc (length + 1);
     340       13743 :       for (p = (unsigned char *)buffer, s = string; *s; s++)
     341             :         {
     342       12860 :           if ( (*s & 0x80 ))
     343             :             {
     344           0 :               *p++ = 0xc0 | ((*s >> 6) & 3);
     345           0 :               *p++ = 0x80 | (*s & 0x3f);
     346             :             }
     347             :           else
     348       12860 :             *p++ = *s;
     349             :         }
     350         883 :       *p = 0;
     351             :     }
     352             :   else
     353             :     {
     354             :       /* Need to use iconv.  */
     355             :       iconv_t cd;
     356             :       const char *inptr;
     357             :       char *outptr;
     358             :       size_t inbytes, outbytes;
     359             : 
     360           0 :       cd = iconv_open ("utf-8", active_charset_name);
     361           0 :       if (cd == (iconv_t)-1)
     362             :         {
     363           0 :           handle_iconv_error ("utf-8", active_charset_name, 1);
     364           0 :           return native_to_utf8 (string);
     365             :         }
     366             : 
     367           0 :       for (s=string; *s; s++ )
     368             :         {
     369           0 :           length++;
     370           0 :           if ((*s & 0x80))
     371           0 :             length += 5; /* We may need up to 6 bytes for the utf8 output. */
     372             :         }
     373           0 :       buffer = xmalloc (length + 1);
     374             : 
     375           0 :       inptr = string;
     376           0 :       inbytes = strlen (string);
     377           0 :       outptr = buffer;
     378           0 :       outbytes = length;
     379           0 :       if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
     380             :                   &outptr, &outbytes) == (size_t)-1)
     381             :         {
     382             :           static int shown;
     383             : 
     384           0 :           if (!shown)
     385           0 :             log_info (_("conversion from '%s' to '%s' failed: %s\n"),
     386           0 :                       active_charset_name, "utf-8", strerror (errno));
     387           0 :           shown = 1;
     388             :           /* We don't do any conversion at all but use the strings as is. */
     389           0 :           strcpy (buffer, string);
     390             :         }
     391             :       else /* Success.  */
     392             :         {
     393           0 :           *outptr = 0;
     394             :           /* We could realloc the buffer now but I doubt that it makes
     395             :              much sense given that it will get freed anyway soon
     396             :              after.  */
     397             :         }
     398           0 :       iconv_close (cd);
     399             :     }
     400         883 :   return buffer;
     401             : }
     402             : 
     403             : 
     404             : 
     405             : static char *
     406         880 : do_utf8_to_native (const char *string, size_t length, int delim,
     407             :                    int with_iconv)
     408             : {
     409             :   int nleft;
     410             :   int i;
     411             :   unsigned char encbuf[8];
     412             :   int encidx;
     413             :   const unsigned char *s;
     414             :   size_t n;
     415         880 :   char *buffer = NULL;
     416         880 :   char *p = NULL;
     417         880 :   unsigned long val = 0;
     418             :   size_t slen;
     419         880 :   int resync = 0;
     420             : 
     421             :   /* First pass (p==NULL): count the extended utf-8 characters.  */
     422             :   /* Second pass (p!=NULL): create string.  */
     423             :   for (;;)
     424             :     {
     425       62144 :       for (slen = length, nleft = encidx = 0, n = 0,
     426        1760 :              s = (const unsigned char *)string;
     427             :            slen;
     428       56864 :            s++, slen--)
     429             :         {
     430       56864 :           if (resync)
     431             :             {
     432           0 :               if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))
     433             :                 {
     434             :                   /* Still invalid. */
     435           0 :                   if (p)
     436             :                     {
     437           0 :                       sprintf (p, "\\x%02x", *s);
     438           0 :                       p += 4;
     439             :                     }
     440           0 :                   n += 4;
     441           0 :                   continue;
     442             :                 }
     443           0 :               resync = 0;
     444             :             }
     445       56864 :           if (!nleft)
     446             :             {
     447       56864 :               if (!(*s & 0x80))
     448             :                 {
     449             :                   /* Plain ascii. */
     450       56864 :                   if ( delim != -1
     451       56864 :                        && (*s < 0x20 || *s == 0x7f || *s == delim
     452       56864 :                            || (delim && *s == '\\')))
     453             :                     {
     454           0 :                       n++;
     455           0 :                       if (p)
     456           0 :                         *p++ = '\\';
     457           0 :                       switch (*s)
     458             :                         {
     459           0 :                         case '\n': n++; if ( p ) *p++ = 'n'; break;
     460           0 :                         case '\r': n++; if ( p ) *p++ = 'r'; break;
     461           0 :                         case '\f': n++; if ( p ) *p++ = 'f'; break;
     462           0 :                         case '\v': n++; if ( p ) *p++ = 'v'; break;
     463           0 :                         case '\b': n++; if ( p ) *p++ = 'b'; break;
     464           0 :                         case    0: n++; if ( p ) *p++ = '0'; break;
     465             :                         default:
     466           0 :                           n += 3;
     467           0 :                           if (p)
     468             :                             {
     469           0 :                               sprintf (p, "x%02x", *s);
     470           0 :                               p += 3;
     471             :                             }
     472           0 :                           break;
     473             :                         }
     474           0 :                     }
     475             :                   else
     476             :                     {
     477       56864 :                       if (p)
     478       28432 :                         *p++ = *s;
     479       56864 :                       n++;
     480             :                     }
     481             :                 }
     482           0 :               else if ((*s & 0xe0) == 0xc0) /* 110x xxxx */
     483             :                 {
     484           0 :                   val = *s & 0x1f;
     485           0 :                   nleft = 1;
     486           0 :                   encidx = 0;
     487           0 :                   encbuf[encidx++] = *s;
     488             :                 }
     489           0 :               else if ((*s & 0xf0) == 0xe0) /* 1110 xxxx */
     490             :                 {
     491           0 :                   val = *s & 0x0f;
     492           0 :                   nleft = 2;
     493           0 :                   encidx = 0;
     494           0 :                   encbuf[encidx++] = *s;
     495             :                 }
     496           0 :               else if ((*s & 0xf8) == 0xf0) /* 1111 0xxx */
     497             :                 {
     498           0 :                   val = *s & 0x07;
     499           0 :                   nleft = 3;
     500           0 :                   encidx = 0;
     501           0 :                   encbuf[encidx++] = *s;
     502             :                 }
     503           0 :               else if ((*s & 0xfc) == 0xf8) /* 1111 10xx */
     504             :                 {
     505           0 :                   val = *s & 0x03;
     506           0 :                   nleft = 4;
     507           0 :                   encidx = 0;
     508           0 :                   encbuf[encidx++] = *s;
     509             :                 }
     510           0 :               else if ((*s & 0xfe) == 0xfc) /* 1111 110x */
     511             :                 {
     512           0 :                   val = *s & 0x01;
     513           0 :                   nleft = 5;
     514           0 :                   encidx = 0;
     515           0 :                   encbuf[encidx++] = *s;
     516             :                 }
     517             :               else /* Invalid encoding: print as \xNN. */
     518             :                 {
     519           0 :                   if (p)
     520             :                     {
     521           0 :                       sprintf (p, "\\x%02x", *s);
     522           0 :                       p += 4;
     523             :                     }
     524           0 :                   n += 4;
     525           0 :                   resync = 1;
     526             :                 }
     527             :             }
     528           0 :           else if (*s < 0x80 || *s >= 0xc0) /* Invalid utf-8 */
     529             :             {
     530           0 :               if (p)
     531             :                 {
     532           0 :                   for (i = 0; i < encidx; i++)
     533             :                     {
     534           0 :                       sprintf (p, "\\x%02x", encbuf[i]);
     535           0 :                       p += 4;
     536             :                     }
     537           0 :                   sprintf (p, "\\x%02x", *s);
     538           0 :                   p += 4;
     539             :                 }
     540           0 :               n += 4 + 4 * encidx;
     541           0 :               nleft = 0;
     542           0 :               encidx = 0;
     543           0 :               resync = 1;
     544             :             }
     545             :           else
     546             :             {
     547           0 :               encbuf[encidx++] = *s;
     548           0 :               val <<= 6;
     549           0 :               val |= *s & 0x3f;
     550           0 :               if (!--nleft)  /* Ready. */
     551             :                 {
     552           0 :                   if (no_translation)
     553             :                     {
     554           0 :                       if (p)
     555             :                         {
     556           0 :                           for (i = 0; i < encidx; i++)
     557           0 :                             *p++ = encbuf[i];
     558             :                         }
     559           0 :                       n += encidx;
     560           0 :                       encidx = 0;
     561             :                     }
     562           0 :                   else if (with_iconv)
     563             :                     {
     564             :                       /* Our strategy for using iconv is a bit strange
     565             :                          but it better keeps compatibility with
     566             :                          previous versions in regard to how invalid
     567             :                          encodings are displayed.  What we do is to
     568             :                          keep the utf-8 as is and have the real
     569             :                          translation step then at the end.  Yes, I
     570             :                          know that this is ugly.  However we are short
     571             :                          of the 1.4 release and for this branch we
     572             :                          should not mess too much around with iconv
     573             :                          things.  One reason for this is that we don't
     574             :                          know enough about non-GNU iconv
     575             :                          implementation and want to minimize the risk
     576             :                          of breaking the code on too many platforms.  */
     577           0 :                         if ( p )
     578             :                           {
     579           0 :                             for (i=0; i < encidx; i++ )
     580           0 :                               *p++ = encbuf[i];
     581             :                           }
     582           0 :                         n += encidx;
     583           0 :                         encidx = 0;
     584             :                     }
     585             :                   else  /* Latin-1 case. */
     586             :                     {
     587           0 :                       if (val >= 0x80 && val < 256)
     588             :                         {
     589             :                           /* We can simply print this character */
     590           0 :                           n++;
     591           0 :                           if (p)
     592           0 :                             *p++ = val;
     593             :                         }
     594             :                       else
     595             :                         {
     596             :                           /* We do not have a translation: print utf8. */
     597           0 :                           if (p)
     598             :                             {
     599           0 :                               for (i = 0; i < encidx; i++)
     600             :                                 {
     601           0 :                                   sprintf (p, "\\x%02x", encbuf[i]);
     602           0 :                                   p += 4;
     603             :                                 }
     604             :                             }
     605           0 :                           n += encidx * 4;
     606           0 :                           encidx = 0;
     607             :                         }
     608             :                     }
     609             :                 }
     610             : 
     611             :             }
     612             :         }
     613        1760 :       if (!buffer)
     614             :         {
     615             :           /* Allocate the buffer after the first pass. */
     616         880 :           buffer = p = xmalloc (n + 1);
     617             :         }
     618         880 :       else if (with_iconv)
     619             :         {
     620             :           /* Note: See above for comments.  */
     621             :           iconv_t cd;
     622             :           const char *inptr;
     623             :           char *outbuf, *outptr;
     624             :           size_t inbytes, outbytes;
     625             : 
     626           0 :           *p = 0;  /* Terminate the buffer. */
     627             : 
     628           0 :           cd = iconv_open (active_charset_name, "utf-8");
     629           0 :           if (cd == (iconv_t)-1)
     630             :             {
     631           0 :               handle_iconv_error (active_charset_name, "utf-8", 1);
     632           0 :               xfree (buffer);
     633           0 :               return utf8_to_native (string, length, delim);
     634             :             }
     635             : 
     636             :           /* Allocate a new buffer large enough to hold all possible
     637             :              encodings. */
     638           0 :           n = p - buffer + 1;
     639           0 :           inbytes = n - 1;;
     640           0 :           inptr = buffer;
     641           0 :           outbytes = n * MB_LEN_MAX;
     642           0 :           if (outbytes / MB_LEN_MAX != n)
     643           0 :             BUG (); /* Actually an overflow. */
     644           0 :           outbuf = outptr = xmalloc (outbytes);
     645           0 :           if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
     646             :                       &outptr, &outbytes) == (size_t)-1)
     647             :             {
     648             :               static int shown;
     649             : 
     650           0 :               if (!shown)
     651           0 :                 log_info (_("conversion from '%s' to '%s' failed: %s\n"),
     652           0 :                           "utf-8", active_charset_name, strerror (errno));
     653           0 :               shown = 1;
     654             :               /* Didn't worked out.  Try again but without iconv.  */
     655           0 :               xfree (buffer);
     656           0 :               buffer = NULL;
     657           0 :               xfree (outbuf);
     658           0 :               outbuf = do_utf8_to_native (string, length, delim, 0);
     659             :             }
     660             :             else /* Success.  */
     661             :               {
     662           0 :                 *outptr = 0; /* Make sure it is a string. */
     663             :                 /* We could realloc the buffer now but I doubt that it
     664             :                    makes much sense given that it will get freed
     665             :                    anyway soon after.  */
     666           0 :                 xfree (buffer);
     667             :               }
     668           0 :           iconv_close (cd);
     669           0 :           return outbuf;
     670             :         }
     671             :       else /* Not using iconv. */
     672             :         {
     673         880 :           *p = 0; /* Make sure it is a string. */
     674         880 :           return buffer;
     675             :         }
     676         880 :     }
     677             : }
     678             : 
     679             : /* Convert string, which is in UTF-8 to native encoding.  Replace
     680             :    illegal encodings by some "\xnn" and quote all control
     681             :    characters. A character with value DELIM will always be quoted, it
     682             :    must be a vanilla ASCII character.  A DELIM value of -1 is special:
     683             :    it disables all quoting of control characters.  This function
     684             :    terminates the process on memory shortage.  */
     685             : char *
     686         880 : utf8_to_native (const char *string, size_t length, int delim)
     687             : {
     688         880 :   return do_utf8_to_native (string, length, delim, use_iconv);
     689             : }
     690             : 
     691             : 
     692             : 
     693             : 
     694             : /* Wrapper function for iconv_open, required for W32 as we dlopen that
     695             :    library on that system.  */
     696             : jnlib_iconv_t
     697           0 : jnlib_iconv_open (const char *tocode, const char *fromcode)
     698             : {
     699           0 :   return (jnlib_iconv_t)iconv_open (tocode, fromcode);
     700             : }
     701             : 
     702             : 
     703             : /* Wrapper function for iconv, required for W32 as we dlopen that
     704             :    library on that system.  */
     705             : size_t
     706           0 : jnlib_iconv (jnlib_iconv_t cd,
     707             :              const char **inbuf, size_t *inbytesleft,
     708             :              char **outbuf, size_t *outbytesleft)
     709             : {
     710           0 :   return iconv ((iconv_t)cd, (ICONV_CONST char**)inbuf, inbytesleft,
     711             :                 outbuf, outbytesleft);
     712             : }
     713             : 
     714             : /* Wrapper function for iconv_close, required for W32 as we dlopen that
     715             :    library on that system.  */
     716             : int
     717           0 : jnlib_iconv_close (jnlib_iconv_t cd)
     718             : {
     719           0 :   return iconv_close ((iconv_t)cd);
     720             : }
     721             : 
     722             : 
     723             : #ifdef HAVE_W32_SYSTEM
     724             : /* Return a malloced string encoded for CODEPAGE from the wide char input
     725             :    string STRING.  Caller must free this value.  Returns NULL and sets
     726             :    ERRNO on failure.  Calling this function with STRING set to NULL is
     727             :    not defined.  */
     728             : static char *
     729             : wchar_to_cp (const wchar_t *string, unsigned int codepage)
     730             : {
     731             :   int n;
     732             :   char *result;
     733             : 
     734             :   n = WideCharToMultiByte (codepage, 0, string, -1, NULL, 0, NULL, NULL);
     735             :   if (n < 0)
     736             :     {
     737             :       gpg_err_set_errno (EINVAL);
     738             :       return NULL;
     739             :     }
     740             : 
     741             :   result = xtrymalloc (n+1);
     742             :   if (!result)
     743             :     return NULL;
     744             : 
     745             :   n = WideCharToMultiByte (codepage, 0, string, -1, result, n, NULL, NULL);
     746             :   if (n < 0)
     747             :     {
     748             :       xfree (result);
     749             :       gpg_err_set_errno (EINVAL);
     750             :       result = NULL;
     751             :     }
     752             :   return result;
     753             : }
     754             : 
     755             : 
     756             : /* Return a malloced wide char string from a CODEPAGE encoded input
     757             :    string STRING.  Caller must free this value.  Returns NULL and sets
     758             :    ERRNO on failure.  Calling this function with STRING set to NULL is
     759             :    not defined.  */
     760             : static wchar_t *
     761             : cp_to_wchar (const char *string, unsigned int codepage)
     762             : {
     763             :   int n;
     764             :   size_t nbytes;
     765             :   wchar_t *result;
     766             : 
     767             :   n = MultiByteToWideChar (codepage, 0, string, -1, NULL, 0);
     768             :   if (n < 0)
     769             :     {
     770             :       gpg_err_set_errno (EINVAL);
     771             :       return NULL;
     772             :     }
     773             : 
     774             :   nbytes = (size_t)(n+1) * sizeof(*result);
     775             :   if (nbytes / sizeof(*result) != (n+1))
     776             :     {
     777             :       gpg_err_set_errno (ENOMEM);
     778             :       return NULL;
     779             :     }
     780             :   result = xtrymalloc (nbytes);
     781             :   if (!result)
     782             :     return NULL;
     783             : 
     784             :   n = MultiByteToWideChar (codepage, 0, string, -1, result, n);
     785             :   if (n < 0)
     786             :     {
     787             :       xfree (result);
     788             :       gpg_err_set_errno (EINVAL);
     789             :       result = NULL;
     790             :     }
     791             :   return result;
     792             : }
     793             : 
     794             : 
     795             : /* Return a malloced string encoded in the active code page from the
     796             :  * wide char input string STRING.  Caller must free this value.
     797             :  * Returns NULL and sets ERRNO on failure.  Calling this function with
     798             :  * STRING set to NULL is not defined.  */
     799             : char *
     800             : wchar_to_native (const wchar_t *string)
     801             : {
     802             :   return wchar_to_cp (string, CP_ACP);
     803             : }
     804             : 
     805             : 
     806             : /* Return a malloced wide char string from an UTF-8 encoded input
     807             :  * string STRING.  Caller must free this value.  Returns NULL and sets
     808             :  * ERRNO on failure.  Calling this function with STRING set to NULL is
     809             :  * not defined.  */
     810             : wchar_t *
     811             : native_to_wchar (const char *string)
     812             : {
     813             :   return cp_to_wchar (string, CP_ACP);
     814             : }
     815             : 
     816             : 
     817             : /* Return a malloced string encoded in UTF-8 from the wide char input
     818             :  * string STRING.  Caller must free this value.  Returns NULL and sets
     819             :  * ERRNO on failure.  Calling this function with STRING set to NULL is
     820             :  * not defined.  */
     821             : char *
     822             : wchar_to_utf8 (const wchar_t *string)
     823             : {
     824             :   return wchar_to_cp (string, CP_UTF8);
     825             : }
     826             : 
     827             : 
     828             : /* Return a malloced wide char string from an UTF-8 encoded input
     829             :  * string STRING.  Caller must free this value.  Returns NULL and sets
     830             :  * ERRNO on failure.  Calling this function with STRING set to NULL is
     831             :  * not defined.  */
     832             : wchar_t *
     833             : utf8_to_wchar (const char *string)
     834             : {
     835             :   return cp_to_wchar (string, CP_UTF8);
     836             : }
     837             : 
     838             : #endif /*HAVE_W32_SYSTEM*/

Generated by: LCOV version 1.11