Line data Source code
1 : /* data-identify.c - Try to identify the data
2 : Copyright (C) 2013 g10 Code GmbH
3 :
4 : This file is part of GPGME.
5 :
6 : GPGME is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU Lesser General Public License as
8 : published by the Free Software Foundation; either version 2.1 of
9 : the License, or (at your option) any later version.
10 :
11 : GPGME is distributed in the hope that it will be useful, but
12 : WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : Lesser General Public License for more details.
15 :
16 : You should have received a copy of the GNU Lesser General Public
17 : License along with this program; if not, see <http://www.gnu.org/licenses/>.
18 : */
19 :
20 : #if HAVE_CONFIG_H
21 : # include <config.h>
22 : #endif
23 :
24 : #include <stdlib.h>
25 : #include <string.h>
26 :
27 : #include "gpgme.h"
28 : #include "data.h"
29 : #include "util.h"
30 : #include "parsetlv.h"
31 :
32 : /* The size of the sample data we take for detection. */
33 : #define SAMPLE_SIZE 2048
34 :
35 :
36 :
37 : /* Note that DATA may be binary but a final nul is required so that
38 : string operations will find a terminator.
39 :
40 : Returns: GPGME_DATA_TYPE_xxxx */
41 : static gpgme_data_type_t
42 0 : basic_detection (const char *data, size_t datalen)
43 : {
44 : tlvinfo_t ti;
45 : const char *s;
46 : size_t n;
47 0 : int maybe_p12 = 0;
48 :
49 0 : if (datalen < 24) /* Object is probably too short for detection. */
50 0 : return GPGME_DATA_TYPE_UNKNOWN;
51 :
52 : /* This is a common example of a CMS object - it is obvious that we
53 : only need to read a few bytes to get to the OID:
54 : 30 82 0B 59 06 09 2A 86 48 86 F7 0D 01 07 02 A0 82 0B 4A 30 82 0B 46 02
55 : ----------- ++++++++++++++++++++++++++++++++
56 : SEQUENCE OID (signedData)
57 : (2 byte len)
58 :
59 : A PKCS#12 message is:
60 :
61 : 30 82 08 59 02 01 03 30 82 08 1F 06 09 2A 86 48 86 F7 0D 01 07 01 A0 82
62 : ----------- ++++++++ ----------- ++++++++++++++++++++++++++++++++
63 : SEQUENCE INTEGER SEQUENCE OID (data)
64 :
65 : A X.509 certificate is:
66 :
67 : 30 82 05 B8 30 82 04 A0 A0 03 02 01 02 02 07 15 46 A0 BF 30 07 39 30 0D
68 : ----------- +++++++++++ ----- ++++++++ --------------------------
69 : SEQUENCE SEQUENCE [0] INTEGER INTEGER SEQU
70 : (tbs) (version) (s/n) (Algo)
71 :
72 : Thus we need to read at least 22 bytes, we add 2 bytes to cope with
73 : length headers stored with 4 bytes.
74 : */
75 :
76 :
77 0 : s = data;
78 0 : n = datalen;
79 :
80 0 : if (parse_tlv (&s, &n, &ti))
81 0 : goto try_pgp; /* Not properly BER encoded. */
82 0 : if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE
83 0 : && ti.is_cons))
84 : goto try_pgp; /* A CMS object always starts with a sequence. */
85 :
86 0 : if (parse_tlv (&s, &n, &ti))
87 0 : goto try_pgp; /* Not properly BER encoded. */
88 0 : if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE
89 0 : && ti.is_cons && n >= ti.length)
90 : {
91 0 : if (parse_tlv (&s, &n, &ti))
92 0 : goto try_pgp;
93 0 : if (!(ti.cls == ASN1_CLASS_CONTEXT && ti.tag == 0
94 0 : && ti.is_cons && ti.length == 3 && n >= ti.length))
95 : goto try_pgp;
96 :
97 0 : if (parse_tlv (&s, &n, &ti))
98 0 : goto try_pgp;
99 0 : if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER
100 0 : && !ti.is_cons && ti.length == 1 && n && (*s == 1 || *s == 2)))
101 : goto try_pgp;
102 0 : s++;
103 0 : n--;
104 0 : if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER
105 0 : && !ti.is_cons))
106 : goto try_pgp;
107 : /* Because the now following S/N may be larger than the sample
108 : data we have, we stop parsing here and don't check for the
109 : algorithm ID. */
110 0 : return GPGME_DATA_TYPE_X509_CERT;
111 : }
112 0 : if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER
113 0 : && !ti.is_cons && ti.length == 1 && n && *s == 3)
114 : {
115 0 : maybe_p12 = 1;
116 0 : s++;
117 0 : n--;
118 0 : if (parse_tlv (&s, &n, &ti))
119 0 : goto try_pgp;
120 0 : if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE
121 0 : && ti.is_cons))
122 : goto try_pgp;
123 0 : if (parse_tlv (&s, &n, &ti))
124 0 : goto try_pgp;
125 : }
126 0 : if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_OBJECT_ID
127 0 : && !ti.is_cons && ti.length && n >= ti.length)
128 : {
129 0 : if (ti.length == 9)
130 : {
131 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x01", 9))
132 : {
133 : /* Data. */
134 0 : return (maybe_p12 ? GPGME_DATA_TYPE_PKCS12
135 : /* */ : GPGME_DATA_TYPE_CMS_OTHER);
136 : }
137 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x02", 9))
138 : {
139 : /* Signed Data. */
140 0 : return (maybe_p12 ? GPGME_DATA_TYPE_PKCS12
141 : /* */ : GPGME_DATA_TYPE_CMS_SIGNED);
142 : }
143 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x03", 9))
144 0 : return GPGME_DATA_TYPE_CMS_ENCRYPTED; /* Enveloped Data. */
145 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x05", 9))
146 0 : return GPGME_DATA_TYPE_CMS_OTHER; /* Digested Data. */
147 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x06", 9))
148 0 : return GPGME_DATA_TYPE_CMS_OTHER; /* Encrypted Data. */
149 : }
150 0 : else if (ti.length == 11)
151 : {
152 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x09\x10\x01\x02", 11))
153 0 : return GPGME_DATA_TYPE_CMS_OTHER; /* Auth Data. */
154 : }
155 : }
156 :
157 :
158 : try_pgp:
159 : /* Check whether this might be a non-armored PGP message. We need
160 : to do this before checking for armor lines, so that we don't get
161 : fooled by armored messages inside a signed binary PGP message. */
162 0 : if ((data[0] & 0x80))
163 : {
164 : /* That might be a binary PGP message. At least it is not plain
165 : ASCII. Of course this might be certain lead-in text of
166 : armored CMS messages. However, I am not sure whether this is
167 : at all defined and in any case it is uncommon. Thus we don't
168 : do any further plausibility checks but stupidly assume no CMS
169 : armored data will follow. */
170 0 : return GPGME_DATA_TYPE_UNKNOWN;
171 : }
172 :
173 : /* Now check whether there are armor lines. */
174 0 : for (s = data; s && *s; s = (*s=='\n')?(s+1):((s=strchr (s,'\n'))?(s+1):s))
175 : {
176 0 : if (!strncmp (s, "-----BEGIN ", 11))
177 : {
178 0 : if (!strncmp (s+11, "SIGNED ", 7))
179 0 : return GPGME_DATA_TYPE_CMS_SIGNED;
180 0 : if (!strncmp (s+11, "ENCRYPTED ", 10))
181 0 : return GPGME_DATA_TYPE_CMS_ENCRYPTED;
182 0 : if (!strncmp (s+11, "PGP ", 4))
183 : {
184 0 : if (!strncmp (s+15, "SIGNATURE", 9))
185 0 : return GPGME_DATA_TYPE_PGP_SIGNED;
186 0 : if (!strncmp (s+15, "SIGNED MESSAGE", 14))
187 0 : return GPGME_DATA_TYPE_PGP_SIGNED;
188 0 : if (!strncmp (s+15, "PUBLIC KEY BLOCK", 16))
189 0 : return GPGME_DATA_TYPE_PGP_KEY;
190 0 : if (!strncmp (s+15, "PRIVATE KEY BLOCK", 17))
191 0 : return GPGME_DATA_TYPE_PGP_KEY;
192 0 : if (!strncmp (s+15, "SECRET KEY BLOCK", 16))
193 0 : return GPGME_DATA_TYPE_PGP_KEY;
194 0 : if (!strncmp (s+15, "ARMORED FILE", 12))
195 0 : return GPGME_DATA_TYPE_UNKNOWN;
196 0 : return GPGME_DATA_TYPE_PGP_OTHER; /* PGP MESSAGE */
197 : }
198 0 : if (!strncmp (s+11, "CERTIFICATE", 11))
199 0 : return GPGME_DATA_TYPE_X509_CERT;
200 0 : if (!strncmp (s+11, "PKCS12", 6))
201 0 : return GPGME_DATA_TYPE_PKCS12;
202 0 : return GPGME_DATA_TYPE_CMS_OTHER; /* Not PGP, thus we assume CMS. */
203 : }
204 : }
205 :
206 0 : return GPGME_DATA_TYPE_UNKNOWN;
207 : }
208 :
209 :
210 : /* Try to detect the type of the data. Note that this function works
211 : only on seekable data objects. The function tries to reset the
212 : file pointer but there is no guarantee that it will work.
213 :
214 : FIXME: We may want to add internal buffering so that this function
215 : can be implemented for allmost all kind of data objects.
216 : */
217 : gpgme_data_type_t
218 0 : gpgme_data_identify (gpgme_data_t dh, int reserved)
219 : {
220 : gpgme_data_type_t result;
221 : char *sample;
222 : int n;
223 : gpgme_off_t off;
224 :
225 : /* Check whether we can seek the data object. */
226 0 : off = gpgme_data_seek (dh, 0, SEEK_CUR);
227 0 : if (off == (gpgme_off_t)(-1))
228 0 : return GPGME_DATA_TYPE_INVALID;
229 :
230 : /* Allocate a buffer and read the data. */
231 0 : sample = malloc (SAMPLE_SIZE);
232 0 : if (!sample)
233 0 : return GPGME_DATA_TYPE_INVALID; /* Ooops. */
234 0 : n = gpgme_data_read (dh, sample, SAMPLE_SIZE - 1);
235 0 : if (n < 0)
236 : {
237 0 : free (sample);
238 0 : return GPGME_DATA_TYPE_INVALID; /* Ooops. */
239 : }
240 0 : sample[n] = 0; /* (Required for our string functions.) */
241 :
242 0 : result = basic_detection (sample, n);
243 0 : free (sample);
244 0 : gpgme_data_seek (dh, off, SEEK_SET);
245 :
246 0 : return result;
247 : }
|