Line data Source code
1 : /* mbox-util.c - Mail address helper functions
2 : * Copyright (C) 1998-2010 Free Software Foundation, Inc.
3 : * Copyright (C) 1998-2015 Werner Koch
4 : *
5 : * This file is part of GnuPG.
6 : *
7 : * This file is free software; you can redistribute it and/or modify
8 : * it under the terms of the GNU Lesser General Public License as
9 : * published by the Free Software Foundation; either version 2.1 of
10 : * the License, or (at your option) any later version.
11 : *
12 : * This file is distributed in the hope that it will be useful,
13 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : * GNU Lesser General Public License for more details.
16 : *
17 : * You should have received a copy of the GNU Lesser General Public License
18 : * along with this program; if not, see <https://www.gnu.org/licenses/>.
19 : */
20 :
21 : /* NB: This code has been taken from GnuPG. Please keep it in sync
22 : * with GnuPG. */
23 :
24 : #if HAVE_CONFIG_H
25 : # include <config.h>
26 : #endif
27 :
28 : #include <stdio.h>
29 : #include <stdlib.h>
30 : #include <string.h>
31 : #include <unistd.h>
32 : #include <errno.h>
33 :
34 : #include "mbox-util.h"
35 :
36 : /* Lowercase all ASCII characters in STRING. */
37 : static char *
38 1276 : ascii_strlwr (char *string)
39 : {
40 : char *p;
41 :
42 24137 : for (p = string; *p; p++ )
43 22861 : if (!(*p & ~0x7f) && *p >= 'A' && *p <= 'Z')
44 0 : *p |= 0x20;
45 :
46 1276 : return string;
47 : }
48 :
49 :
50 : static int
51 1193 : string_count_chr (const char *string, int c)
52 : {
53 : int count;
54 :
55 22089 : for (count=0; *string; string++ )
56 20896 : if ( *string == c )
57 1193 : count++;
58 1193 : return count;
59 : }
60 :
61 : static int
62 84 : mem_count_chr (const void *buffer, int c, size_t length)
63 : {
64 84 : const char *s = buffer;
65 : int count;
66 :
67 2052 : for (count=0; length; length--, s++)
68 1968 : if (*s == c)
69 83 : count++;
70 84 : return count;
71 : }
72 :
73 :
74 : /* This is a case-sensitive version of our memistr. I wonder why no
75 : standard function memstr exists but I better do not use the name
76 : memstr to avoid future conflicts. */
77 : static const char *
78 83 : my_memstr (const void *buffer, size_t buflen, const char *sub)
79 : {
80 83 : const unsigned char *buf = buffer;
81 83 : const unsigned char *t = (const unsigned char *)buf;
82 83 : const unsigned char *s = (const unsigned char *)sub;
83 83 : size_t n = buflen;
84 :
85 2048 : for ( ; n ; t++, n-- )
86 : {
87 1965 : if (*t == *s)
88 : {
89 147 : for (buf = t++, buflen = n--, s++; n && *t ==*s; t++, s++, n--)
90 : ;
91 147 : if (!*s)
92 0 : return (const char*)buf;
93 147 : t = (const unsigned char *)buf;
94 147 : s = (const unsigned char *)sub ;
95 147 : n = buflen;
96 : }
97 : }
98 83 : return NULL;
99 : }
100 :
101 :
102 :
103 : static int
104 1193 : string_has_ctrl_or_space (const char *string)
105 : {
106 22089 : for (; *string; string++ )
107 20896 : if (!(*string & 0x80) && *string <= 0x20)
108 0 : return 1;
109 1193 : return 0;
110 : }
111 :
112 :
113 : /* Return true if STRING has two consecutive '.' after an '@'
114 : sign. */
115 : static int
116 1193 : has_dotdot_after_at (const char *string)
117 : {
118 1193 : string = strchr (string, '@');
119 1193 : if (!string)
120 0 : return 0; /* No at-sign. */
121 1193 : string++;
122 1193 : return !!strstr (string, "..");
123 : }
124 :
125 :
126 : /* Check whether BUFFER has characters not valid in an RFC-822
127 : address. LENGTH gives the length of BUFFER.
128 :
129 : To cope with OpenPGP we ignore non-ascii characters so that for
130 : example umlauts are legal in an email address. An OpenPGP user ID
131 : must be utf-8 encoded but there is no strict requirement for
132 : RFC-822. Thus to avoid IDNA encoding we put the address verbatim
133 : as utf-8 into the user ID under the assumption that mail programs
134 : handle IDNA at a lower level and take OpenPGP user IDs as utf-8.
135 : Note that we can't do an utf-8 encoding checking here because in
136 : keygen.c this function is called with the native encoding and
137 : native to utf-8 encoding is only done later. */
138 : static int
139 367 : has_invalid_email_chars (const void *buffer, size_t length)
140 : {
141 367 : const unsigned char *s = buffer;
142 367 : int at_seen=0;
143 367 : const char *valid_chars=
144 : "01234567890_-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
145 :
146 3772 : for ( ; length && *s; length--, s++ )
147 : {
148 3688 : if ((*s & 0x80))
149 0 : continue; /* We only care about ASCII. */
150 3688 : if (*s == '@')
151 83 : at_seen=1;
152 3894 : else if (!at_seen && !(strchr (valid_chars, *s)
153 289 : || strchr ("!#$%&'*+/=?^`{|}~", *s)))
154 283 : return 1;
155 3322 : else if (at_seen && !strchr (valid_chars, *s))
156 0 : return 1;
157 : }
158 84 : return 0;
159 : }
160 :
161 :
162 : /* Same as is_valid_mailbox (see below) but operates on non-nul
163 : terminated buffer. */
164 : static int
165 367 : is_valid_mailbox_mem (const void *name_arg, size_t namelen)
166 : {
167 367 : const char *name = name_arg;
168 :
169 450 : return !( !name
170 367 : || !namelen
171 367 : || has_invalid_email_chars (name, namelen)
172 84 : || mem_count_chr (name, '@', namelen) != 1
173 83 : || *name == '@'
174 83 : || name[namelen-1] == '@'
175 83 : || name[namelen-1] == '.'
176 83 : || my_memstr (name, namelen, ".."));
177 : }
178 :
179 :
180 : /* Check whether NAME represents a valid mailbox according to
181 : RFC822. Returns true if so. */
182 : int
183 367 : _gpgme_is_valid_mailbox (const char *name)
184 : {
185 367 : return name? is_valid_mailbox_mem (name, strlen (name)) : 0;
186 : }
187 :
188 :
189 : /* Return the mailbox (local-part@domain) form a standard user id.
190 : All plain ASCII characters in the result are converted to
191 : lowercase. Caller must free the result. Returns NULL if no valid
192 : mailbox was found (or we are out of memory). */
193 : char *
194 1561 : _gpgme_mailbox_from_userid (const char *userid)
195 : {
196 : const char *s, *s_end;
197 : size_t len;
198 1561 : char *result = NULL;
199 :
200 1561 : s = strchr (userid, '<');
201 1561 : if (s)
202 : {
203 : /* Seems to be a standard user id. */
204 1194 : s++;
205 1194 : s_end = strchr (s, '>');
206 1194 : if (s_end && s_end > s)
207 : {
208 1193 : len = s_end - s;
209 1193 : result = malloc (len + 1);
210 1193 : if (!result)
211 0 : return NULL; /* Ooops - out of core. */
212 1193 : strncpy (result, s, len);
213 1193 : result[len] = 0;
214 : /* Apply some basic checks on the address. We do not use
215 : is_valid_mailbox because those checks are too strict. */
216 2386 : if (string_count_chr (result, '@') != 1 /* Need exactly one '@. */
217 1193 : || *result == '@' /* local-part missing. */
218 1193 : || result[len-1] == '@' /* domain missing. */
219 1193 : || result[len-1] == '.' /* ends with a dot. */
220 1193 : || string_has_ctrl_or_space (result)
221 1193 : || has_dotdot_after_at (result))
222 : {
223 0 : free (result);
224 0 : result = NULL;
225 0 : errno = EINVAL;
226 : }
227 : }
228 : else
229 1 : errno = EINVAL;
230 : }
231 367 : else if (_gpgme_is_valid_mailbox (userid))
232 : {
233 : /* The entire user id is a mailbox. Return that one. Note that
234 : this fallback method has some restrictions on the valid
235 : syntax of the mailbox. However, those who want weird
236 : addresses should know about it and use the regular <...>
237 : syntax. */
238 83 : result = strdup (userid);
239 : }
240 : else
241 284 : errno = EINVAL;
242 :
243 1560 : return result? ascii_strlwr (result): NULL;
244 : }
245 :
246 :
247 : /* /\* Check whether UID is a valid standard user id of the form */
248 : /* "Heinrich Heine <heinrichh@duesseldorf.de>" */
249 : /* and return true if this is the case. *\/ */
250 : /* int */
251 : /* is_valid_user_id (const char *uid) */
252 : /* { */
253 : /* if (!uid || !*uid) */
254 : /* return 0; */
255 :
256 : /* return 1; */
257 : /* } */
258 :
259 :
260 : /*
261 : * Exported public API
262 : */
263 :
264 :
265 : /* Return the mail address ("addr-spec" as per RFC-5322) from a string
266 : * which is assumed to be an user id ("address" in RFC-5322). All
267 : * plain ASCII characters (those with bit 7 cleared) in the result
268 : * are converted to lowercase. Caller must free the result using
269 : * gpgme_free. Returns NULL if no valid address was found (in which
270 : * case ERRNO is set to EINVAL) or for other errors. */
271 : char *
272 0 : gpgme_addrspec_from_uid (const char *uid)
273 : {
274 0 : return _gpgme_mailbox_from_userid (uid);
275 : }
|