Line data Source code
1 : /* rfc822parse.c - Simple mail and MIME parser
2 : * Copyright (C) 1999, 2000 Werner Koch, Duesseldorf
3 : * Copyright (C) 2003, 2004 g10 Code GmbH
4 : *
5 : * This program is free software; you can redistribute it and/or
6 : * modify it under the terms of the GNU Lesser General Public License
7 : * as published by the Free Software Foundation; either version 3 of
8 : * the License, or (at your option) any later version.
9 : *
10 : * This program is distributed in the hope that it will be useful,
11 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : * GNU Lesser General Public License for more details.
14 : *
15 : * You should have received a copy of the GNU Lesser General Public
16 : * License along with this program; if not, see <http://www.gnu.org/licenses/>.
17 : */
18 :
19 :
20 : /* According to RFC822 binary zeroes are allowed at many places. We do
21 : * not handle this correct especially in the field parsing code. It
22 : * should be easy to fix and the API provides a interfaces which
23 : * returns the length but in addition makes sure that returned strings
24 : * are always ended by a \0.
25 : *
26 : * Furthermore, the case of field names is changed and thus it is not
27 : * always a good idea to use these modified header
28 : * lines (e.g. signatures may break).
29 : */
30 :
31 : #ifdef HAVE_CONFIG_H
32 : #include <config.h>
33 : #endif
34 :
35 : #include <stdlib.h>
36 : #include <stdio.h>
37 : #include <string.h>
38 : #include <errno.h>
39 : #include <stdarg.h>
40 : #include <assert.h>
41 :
42 : #include "rfc822parse.h"
43 :
44 : enum token_type
45 : {
46 : tSPACE,
47 : tATOM,
48 : tQUOTED,
49 : tDOMAINLIT,
50 : tSPECIAL
51 : };
52 :
53 : /* For now we directly use our TOKEN as the parse context */
54 : typedef struct rfc822parse_field_context *TOKEN;
55 : struct rfc822parse_field_context
56 : {
57 : TOKEN next;
58 : enum token_type type;
59 : struct {
60 : unsigned int cont:1;
61 : unsigned int lowered:1;
62 : } flags;
63 : /*TOKEN owner_pantry; */
64 : char data[1];
65 : };
66 :
67 : struct hdr_line
68 : {
69 : struct hdr_line *next;
70 : int cont; /* This is a continuation of the previous line. */
71 : unsigned char line[1];
72 : };
73 :
74 : typedef struct hdr_line *HDR_LINE;
75 :
76 :
77 : struct part
78 : {
79 : struct part *right; /* The next part. */
80 : struct part *down; /* A contained part. */
81 : HDR_LINE hdr_lines; /* Header lines os that part. */
82 : HDR_LINE *hdr_lines_tail; /* Helper for adding lines. */
83 : char *boundary; /* Only used in the first part. */
84 : };
85 : typedef struct part *part_t;
86 :
87 : struct rfc822parse_context
88 : {
89 : rfc822parse_cb_t callback;
90 : void *callback_value;
91 : int callback_error;
92 : int in_body;
93 : int in_preamble; /* Wether we are before the first boundary. */
94 : part_t parts; /* The tree of parts. */
95 : part_t current_part; /* Whom we are processing (points into parts). */
96 : const char *boundary; /* Current boundary. */
97 : };
98 :
99 : static HDR_LINE find_header (rfc822parse_t msg, const char *name,
100 : int which, HDR_LINE * rprev);
101 :
102 :
103 : static size_t
104 0 : length_sans_trailing_ws (const unsigned char *line, size_t len)
105 : {
106 : const unsigned char *p, *mark;
107 : size_t n;
108 :
109 0 : for (mark=NULL, p=line, n=0; n < len; n++, p++)
110 : {
111 0 : if (strchr (" \t\r\n", *p ))
112 : {
113 0 : if( !mark )
114 0 : mark = p;
115 : }
116 : else
117 0 : mark = NULL;
118 : }
119 :
120 0 : if (mark)
121 0 : return mark - line;
122 0 : return len;
123 : }
124 :
125 :
126 : static void
127 0 : lowercase_string (unsigned char *string)
128 : {
129 0 : for (; *string; string++)
130 0 : if (*string >= 'A' && *string <= 'Z')
131 0 : *string = *string - 'A' + 'a';
132 0 : }
133 :
134 : /* Transform a header name into a standard capitalized format; i.e
135 : "Content-Type". Conversion stops at the colon. As usual we don't
136 : use the localized versions of ctype.h.
137 : */
138 : static void
139 0 : capitalize_header_name (unsigned char *name)
140 : {
141 0 : int first = 1;
142 :
143 0 : for (; *name && *name != ':'; name++)
144 0 : if (*name == '-')
145 0 : first = 1;
146 0 : else if (first)
147 : {
148 0 : if (*name >= 'a' && *name <= 'z')
149 0 : *name = *name - 'a' + 'A';
150 0 : first = 0;
151 : }
152 0 : else if (*name >= 'A' && *name <= 'Z')
153 0 : *name = *name - 'A' + 'a';
154 0 : }
155 :
156 : #ifndef HAVE_STPCPY
157 : static char *
158 : stpcpy (char *a,const char *b)
159 : {
160 : while (*b)
161 : *a++ = *b++;
162 : *a = 0;
163 :
164 : return (char*)a;
165 : }
166 : #endif
167 :
168 :
169 : /* If a callback has been registerd, call it for the event of type
170 : EVENT. */
171 : static int
172 0 : do_callback (rfc822parse_t msg, rfc822parse_event_t event)
173 : {
174 : int rc;
175 :
176 0 : if (!msg->callback || msg->callback_error)
177 0 : return 0;
178 0 : rc = msg->callback (msg->callback_value, event, msg);
179 0 : if (rc)
180 0 : msg->callback_error = rc;
181 0 : return rc;
182 : }
183 :
184 : static part_t
185 0 : new_part (void)
186 : {
187 : part_t part;
188 :
189 0 : part = calloc (1, sizeof *part);
190 0 : if (part)
191 : {
192 0 : part->hdr_lines_tail = &part->hdr_lines;
193 : }
194 0 : return part;
195 : }
196 :
197 :
198 : static void
199 0 : release_part (part_t part)
200 : {
201 : part_t tmp;
202 : HDR_LINE hdr, hdr2;
203 :
204 0 : for (; part; part = tmp)
205 : {
206 0 : tmp = part->right;
207 0 : if (part->down)
208 0 : release_part (part->down);
209 0 : for (hdr = part->hdr_lines; hdr; hdr = hdr2)
210 : {
211 0 : hdr2 = hdr->next;
212 0 : free (hdr);
213 : }
214 0 : free (part->boundary);
215 0 : free (part);
216 : }
217 0 : }
218 :
219 :
220 : static void
221 0 : release_handle_data (rfc822parse_t msg)
222 : {
223 0 : release_part (msg->parts);
224 0 : msg->parts = NULL;
225 0 : msg->current_part = NULL;
226 0 : msg->boundary = NULL;
227 0 : }
228 :
229 :
230 : /* Create a new parsing context for an entire rfc822 message and
231 : return it. CB and CB_VALUE may be given to callback for certain
232 : events. NULL is returned on error with errno set appropriately. */
233 : rfc822parse_t
234 0 : rfc822parse_open (rfc822parse_cb_t cb, void *cb_value)
235 : {
236 0 : rfc822parse_t msg = calloc (1, sizeof *msg);
237 0 : if (msg)
238 : {
239 0 : msg->parts = msg->current_part = new_part ();
240 0 : if (!msg->parts)
241 : {
242 0 : free (msg);
243 0 : msg = NULL;
244 : }
245 : else
246 : {
247 0 : msg->callback = cb;
248 0 : msg->callback_value = cb_value;
249 0 : if (do_callback (msg, RFC822PARSE_OPEN))
250 : {
251 0 : release_handle_data (msg);
252 0 : free (msg);
253 0 : msg = NULL;
254 : }
255 : }
256 : }
257 0 : return msg;
258 : }
259 :
260 :
261 : void
262 0 : rfc822parse_cancel (rfc822parse_t msg)
263 : {
264 0 : if (msg)
265 : {
266 0 : do_callback (msg, RFC822PARSE_CANCEL);
267 0 : release_handle_data (msg);
268 0 : free (msg);
269 : }
270 0 : }
271 :
272 :
273 : void
274 0 : rfc822parse_close (rfc822parse_t msg)
275 : {
276 0 : if (msg)
277 : {
278 0 : do_callback (msg, RFC822PARSE_CLOSE);
279 0 : release_handle_data (msg);
280 0 : free (msg);
281 : }
282 0 : }
283 :
284 : static part_t
285 0 : find_parent (part_t tree, part_t target)
286 : {
287 : part_t part;
288 :
289 0 : for (part = tree->down; part; part = part->right)
290 : {
291 0 : if (part == target)
292 0 : return tree; /* Found. */
293 0 : if (part->down)
294 : {
295 0 : part_t tmp = find_parent (part, target);
296 0 : if (tmp)
297 0 : return tmp;
298 : }
299 : }
300 0 : return NULL;
301 : }
302 :
303 : static void
304 0 : set_current_part_to_parent (rfc822parse_t msg)
305 : {
306 : part_t parent;
307 :
308 0 : assert (msg->current_part);
309 0 : parent = find_parent (msg->parts, msg->current_part);
310 0 : if (!parent)
311 0 : return; /* Already at the top. */
312 :
313 : #ifndef NDEBUG
314 : {
315 : part_t part;
316 0 : for (part = parent->down; part; part = part->right)
317 0 : if (part == msg->current_part)
318 0 : break;
319 0 : assert (part);
320 : }
321 : #endif
322 0 : msg->current_part = parent;
323 :
324 0 : parent = find_parent (msg->parts, parent);
325 0 : msg->boundary = parent? parent->boundary: NULL;
326 : }
327 :
328 :
329 :
330 : /****************
331 : * We have read in all header lines and are about to receive the body
332 : * part. The delimiter line has already been processed.
333 : *
334 : * FIXME: we's better return an error in case of memory failures.
335 : */
336 : static int
337 0 : transition_to_body (rfc822parse_t msg)
338 : {
339 : rfc822parse_field_t ctx;
340 : int rc;
341 :
342 0 : rc = do_callback (msg, RFC822PARSE_T2BODY);
343 0 : if (!rc)
344 : {
345 : /* Store the boundary if we have multipart type. */
346 0 : ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
347 0 : if (ctx)
348 : {
349 : const char *s;
350 :
351 0 : s = rfc822parse_query_media_type (ctx, NULL);
352 0 : if (s && !strcmp (s,"multipart"))
353 : {
354 0 : s = rfc822parse_query_parameter (ctx, "boundary", 0);
355 0 : if (s)
356 : {
357 0 : assert (!msg->current_part->boundary);
358 0 : msg->current_part->boundary = malloc (strlen (s) + 1);
359 0 : if (msg->current_part->boundary)
360 : {
361 : part_t part;
362 :
363 0 : strcpy (msg->current_part->boundary, s);
364 0 : msg->boundary = msg->current_part->boundary;
365 0 : part = new_part ();
366 0 : if (!part)
367 : {
368 0 : int save_errno = errno;
369 0 : rfc822parse_release_field (ctx);
370 0 : errno = save_errno;
371 0 : return -1;
372 : }
373 0 : rc = do_callback (msg, RFC822PARSE_LEVEL_DOWN);
374 0 : assert (!msg->current_part->down);
375 0 : msg->current_part->down = part;
376 0 : msg->current_part = part;
377 0 : msg->in_preamble = 1;
378 : }
379 : }
380 : }
381 0 : rfc822parse_release_field (ctx);
382 : }
383 : }
384 :
385 0 : return rc;
386 : }
387 :
388 : /* We have just passed a MIME boundary and need to prepare for new part.
389 : headers. */
390 : static int
391 0 : transition_to_header (rfc822parse_t msg)
392 : {
393 : part_t part;
394 :
395 0 : assert (msg->current_part);
396 0 : assert (!msg->current_part->right);
397 :
398 0 : part = new_part ();
399 0 : if (!part)
400 0 : return -1;
401 :
402 0 : msg->current_part->right = part;
403 0 : msg->current_part = part;
404 0 : return 0;
405 : }
406 :
407 :
408 : static int
409 0 : insert_header (rfc822parse_t msg, const unsigned char *line, size_t length)
410 : {
411 : HDR_LINE hdr;
412 :
413 0 : assert (msg->current_part);
414 0 : if (!length)
415 : {
416 0 : msg->in_body = 1;
417 0 : return transition_to_body (msg);
418 : }
419 :
420 0 : if (!msg->current_part->hdr_lines)
421 0 : do_callback (msg, RFC822PARSE_BEGIN_HEADER);
422 :
423 0 : length = length_sans_trailing_ws (line, length);
424 0 : hdr = malloc (sizeof (*hdr) + length);
425 0 : if (!hdr)
426 0 : return -1;
427 0 : hdr->next = NULL;
428 0 : hdr->cont = (*line == ' ' || *line == '\t');
429 0 : memcpy (hdr->line, line, length);
430 0 : hdr->line[length] = 0; /* Make it a string. */
431 :
432 : /* Transform a field name into canonical format. */
433 0 : if (!hdr->cont && strchr (line, ':'))
434 0 : capitalize_header_name (hdr->line);
435 :
436 0 : *msg->current_part->hdr_lines_tail = hdr;
437 0 : msg->current_part->hdr_lines_tail = &hdr->next;
438 :
439 : /* Lets help the caller to prevent mail loops and issue an event for
440 : * every Received header. */
441 0 : if (length >= 9 && !memcmp (line, "Received:", 9))
442 0 : do_callback (msg, RFC822PARSE_RCVD_SEEN);
443 0 : return 0;
444 : }
445 :
446 :
447 : /****************
448 : * Note: We handle the body transparent to allow binary zeroes in it.
449 : */
450 : static int
451 0 : insert_body (rfc822parse_t msg, const unsigned char *line, size_t length)
452 : {
453 0 : int rc = 0;
454 :
455 0 : if (length > 2 && *line == '-' && line[1] == '-' && msg->boundary)
456 : {
457 0 : size_t blen = strlen (msg->boundary);
458 :
459 0 : if (length == blen + 2
460 0 : && !memcmp (line+2, msg->boundary, blen))
461 : {
462 0 : rc = do_callback (msg, RFC822PARSE_BOUNDARY);
463 0 : msg->in_body = 0;
464 0 : if (!rc && !msg->in_preamble)
465 0 : rc = transition_to_header (msg);
466 0 : msg->in_preamble = 0;
467 : }
468 0 : else if (length == blen + 4
469 0 : && line[length-2] =='-' && line[length-1] == '-'
470 0 : && !memcmp (line+2, msg->boundary, blen))
471 : {
472 0 : rc = do_callback (msg, RFC822PARSE_LAST_BOUNDARY);
473 0 : msg->boundary = NULL; /* No current boundary anymore. */
474 0 : set_current_part_to_parent (msg);
475 :
476 : /* Fixme: The next should actually be send right before the
477 : next boundary, so that we can mark the epilogue. */
478 0 : if (!rc)
479 0 : rc = do_callback (msg, RFC822PARSE_LEVEL_UP);
480 : }
481 : }
482 0 : if (msg->in_preamble && !rc)
483 0 : rc = do_callback (msg, RFC822PARSE_PREAMBLE);
484 :
485 0 : return rc;
486 : }
487 :
488 : /* Insert the next line into the parser. Return 0 on success or true
489 : on error with errno set appropriately. */
490 : int
491 0 : rfc822parse_insert (rfc822parse_t msg, const unsigned char *line, size_t length)
492 : {
493 0 : return (msg->in_body
494 : ? insert_body (msg, line, length)
495 0 : : insert_header (msg, line, length));
496 : }
497 :
498 :
499 : /* Tell the parser that we have finished the message. */
500 : int
501 0 : rfc822parse_finish (rfc822parse_t msg)
502 : {
503 0 : return do_callback (msg, RFC822PARSE_FINISH);
504 : }
505 :
506 :
507 :
508 : /****************
509 : * Get a copy of a header line. The line is returned as one long
510 : * string with LF to separate the continuation line. Caller must free
511 : * the return buffer. WHICH may be used to enumerate over all lines.
512 : * Wildcards are allowed. This function works on the current headers;
513 : * i.e. the regular mail headers or the MIME headers of the current
514 : * part.
515 : *
516 : * WHICH gives the mode:
517 : * -1 := Take the last occurrence
518 : * n := Take the n-th one.
519 : *
520 : * Returns a newly allocated buffer or NULL on error. errno is set in
521 : * case of a memory failure or set to 0 if the requested field is not
522 : * available.
523 : *
524 : * If VALUEOFF is not NULL it will receive the offset of the first non
525 : * space character in the value part of the line (i.e. after the first
526 : * colon).
527 : */
528 : char *
529 0 : rfc822parse_get_field (rfc822parse_t msg, const char *name, int which,
530 : size_t *valueoff)
531 : {
532 : HDR_LINE h, h2;
533 : char *buf, *p;
534 : size_t n;
535 :
536 0 : h = find_header (msg, name, which, NULL);
537 0 : if (!h)
538 : {
539 0 : errno = 0;
540 0 : return NULL; /* no such field */
541 : }
542 :
543 0 : n = strlen (h->line) + 1;
544 0 : for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
545 0 : n += strlen (h2->line) + 1;
546 :
547 0 : buf = p = malloc (n);
548 0 : if (buf)
549 : {
550 0 : p = stpcpy (p, h->line);
551 0 : *p++ = '\n';
552 0 : for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
553 : {
554 0 : p = stpcpy (p, h2->line);
555 0 : *p++ = '\n';
556 : }
557 0 : p[-1] = 0;
558 : }
559 :
560 0 : if (valueoff)
561 : {
562 0 : p = strchr (buf, ':');
563 0 : if (!p)
564 0 : *valueoff = 0; /* Oops: should never happen. */
565 : else
566 : {
567 0 : p++;
568 0 : while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
569 0 : p++;
570 0 : *valueoff = p - buf;
571 : }
572 : }
573 :
574 0 : return buf;
575 : }
576 :
577 :
578 : /****************
579 : * Enumerate all header. Caller has to provide the address of a pointer
580 : * which has to be initialzed to NULL, the caller should then never change this
581 : * pointer until he has closed the enumeration by passing again the address
582 : * of the pointer but with msg set to NULL.
583 : * The function returns pointers to all the header lines or NULL when
584 : * all lines have been enumerated or no headers are available.
585 : */
586 : const char *
587 0 : rfc822parse_enum_header_lines (rfc822parse_t msg, void **context)
588 : {
589 : HDR_LINE l;
590 :
591 0 : if (!msg) /* Close. */
592 0 : return NULL;
593 :
594 0 : if (*context == msg || !msg->current_part)
595 0 : return NULL;
596 :
597 0 : l = *context ? (HDR_LINE) *context : msg->current_part->hdr_lines;
598 :
599 0 : if (l)
600 : {
601 0 : *context = l->next ? (void *) (l->next) : (void *) msg;
602 0 : return l->line;
603 : }
604 0 : *context = msg; /* Mark end of list. */
605 0 : return NULL;
606 : }
607 :
608 :
609 :
610 : /****************
611 : * Find a header field. If the Name does end in an asterisk this is meant
612 : * to be a wildcard.
613 : *
614 : * which -1 : Retrieve the last field
615 : * >0 : Retrieve the n-th field
616 :
617 : * RPREV may be used to return the predecessor of the returned field;
618 : * which may be NULL for the very first one. It has to be initialzed
619 : * to either NULL in which case the search start at the first header line,
620 : * or it may point to a headerline, where the search should start
621 : */
622 : static HDR_LINE
623 0 : find_header (rfc822parse_t msg, const char *name, int which, HDR_LINE *rprev)
624 : {
625 0 : HDR_LINE hdr, prev = NULL, mark = NULL;
626 : unsigned char *p;
627 : size_t namelen, n;
628 0 : int found = 0;
629 0 : int glob = 0;
630 :
631 0 : if (!msg->current_part)
632 0 : return NULL;
633 :
634 0 : namelen = strlen (name);
635 0 : if (namelen && name[namelen - 1] == '*')
636 : {
637 0 : namelen--;
638 0 : glob = 1;
639 : }
640 :
641 0 : hdr = msg->current_part->hdr_lines;
642 0 : if (rprev && *rprev)
643 : {
644 : /* spool forward to the requested starting place.
645 : * we cannot simply set this as we have to return
646 : * the previous list element too */
647 0 : for (; hdr && hdr != *rprev; prev = hdr, hdr = hdr->next)
648 : ;
649 : }
650 :
651 0 : for (; hdr; prev = hdr, hdr = hdr->next)
652 : {
653 0 : if (hdr->cont)
654 0 : continue;
655 0 : if (!(p = strchr (hdr->line, ':')))
656 0 : continue; /* invalid header, just skip it. */
657 0 : n = p - hdr->line;
658 0 : if (!n)
659 0 : continue; /* invalid name */
660 0 : if ((glob ? (namelen <= n) : (namelen == n))
661 0 : && !memcmp (hdr->line, name, namelen))
662 : {
663 0 : found++;
664 0 : if (which == -1)
665 0 : mark = hdr;
666 0 : else if (found == which)
667 : {
668 0 : if (rprev)
669 0 : *rprev = prev;
670 0 : return hdr;
671 : }
672 : }
673 : }
674 0 : if (mark && rprev)
675 0 : *rprev = prev;
676 0 : return mark;
677 : }
678 :
679 :
680 :
681 : static const char *
682 0 : skip_ws (const char *s)
683 : {
684 0 : while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
685 0 : s++;
686 0 : return s;
687 : }
688 :
689 :
690 : static void
691 0 : release_token_list (TOKEN t)
692 : {
693 0 : while (t)
694 : {
695 0 : TOKEN t2 = t->next;
696 : /* fixme: If we have owner_pantry, put the token back to
697 : * this pantry so that it can be reused later */
698 0 : free (t);
699 0 : t = t2;
700 : }
701 0 : }
702 :
703 :
704 : static TOKEN
705 0 : new_token (enum token_type type, const char *buf, size_t length)
706 : {
707 : TOKEN t;
708 :
709 : /* fixme: look through our pantries to find a suitable
710 : * token for reuse */
711 0 : t = malloc (sizeof *t + length);
712 0 : if (t)
713 : {
714 0 : t->next = NULL;
715 0 : t->type = type;
716 0 : memset (&t->flags, 0, sizeof (t->flags));
717 0 : t->data[0] = 0;
718 0 : if (buf)
719 : {
720 0 : memcpy (t->data, buf, length);
721 0 : t->data[length] = 0; /* Make sure it is a C string. */
722 : }
723 : else
724 0 : t->data[0] = 0;
725 : }
726 0 : return t;
727 : }
728 :
729 : static TOKEN
730 0 : append_to_token (TOKEN old, const char *buf, size_t length)
731 : {
732 0 : size_t n = strlen (old->data);
733 : TOKEN t;
734 :
735 0 : t = malloc (sizeof *t + n + length);
736 0 : if (t)
737 : {
738 0 : t->next = old->next;
739 0 : t->type = old->type;
740 0 : t->flags = old->flags;
741 0 : memcpy (t->data, old->data, n);
742 0 : memcpy (t->data + n, buf, length);
743 0 : t->data[n + length] = 0;
744 0 : old->next = NULL;
745 0 : release_token_list (old);
746 : }
747 0 : return t;
748 : }
749 :
750 :
751 :
752 : /*
753 : Parse a field into tokens as defined by rfc822.
754 : */
755 : static TOKEN
756 0 : parse_field (HDR_LINE hdr)
757 : {
758 : static const char specials[] = "<>@.,;:\\[]\"()";
759 : static const char specials2[] = "<>@.,;:";
760 : static const char tspecials[] = "/?=<>@,;:\\[]\"()";
761 : static const char tspecials2[] = "/?=<>@.,;:"; /* FIXME: really
762 : include '.'?*/
763 : static struct
764 : {
765 : const unsigned char *name;
766 : size_t namelen;
767 : } tspecial_header[] = {
768 : { "Content-Type", 12},
769 : { "Content-Transfer-Encoding", 25},
770 : { "Content-Disposition", 19},
771 : { NULL, 0}
772 : };
773 : const char *delimiters;
774 : const char *delimiters2;
775 : const unsigned char *line, *s, *s2;
776 : size_t n;
777 0 : int i, invalid = 0;
778 : TOKEN t, tok, *tok_tail;
779 :
780 0 : errno = 0;
781 0 : if (!hdr)
782 0 : return NULL;
783 :
784 0 : tok = NULL;
785 0 : tok_tail = &tok;
786 :
787 0 : line = hdr->line;
788 0 : if (!(s = strchr (line, ':')))
789 0 : return NULL; /* oops */
790 :
791 0 : n = s - line;
792 0 : if (!n)
793 0 : return NULL; /* oops: invalid name */
794 :
795 0 : delimiters = specials;
796 0 : delimiters2 = specials2;
797 0 : for (i = 0; tspecial_header[i].name; i++)
798 : {
799 0 : if (n == tspecial_header[i].namelen
800 0 : && !memcmp (line, tspecial_header[i].name, n))
801 : {
802 0 : delimiters = tspecials;
803 0 : delimiters2 = tspecials2;
804 0 : break;
805 : }
806 : }
807 :
808 0 : s++; /* Move over the colon. */
809 : for (;;)
810 : {
811 0 : while (!*s)
812 : {
813 0 : if (!hdr->next || !hdr->next->cont)
814 0 : return tok; /* Ready. */
815 :
816 : /* Next item is a header continuation line. */
817 0 : hdr = hdr->next;
818 0 : s = hdr->line;
819 : }
820 :
821 0 : if (*s == '(')
822 : {
823 0 : int level = 1;
824 0 : int in_quote = 0;
825 :
826 0 : invalid = 0;
827 0 : for (s++;; s++)
828 : {
829 0 : while (!*s)
830 : {
831 0 : if (!hdr->next || !hdr->next->cont)
832 : goto oparen_out;
833 : /* Next item is a header continuation line. */
834 0 : hdr = hdr->next;
835 0 : s = hdr->line;
836 : }
837 :
838 0 : if (in_quote)
839 : {
840 0 : if (*s == '\"')
841 0 : in_quote = 0;
842 0 : else if (*s == '\\' && s[1]) /* what about continuation? */
843 0 : s++;
844 : }
845 0 : else if (*s == ')')
846 : {
847 0 : if (!--level)
848 0 : break;
849 : }
850 0 : else if (*s == '(')
851 0 : level++;
852 0 : else if (*s == '\"')
853 0 : in_quote = 1;
854 0 : }
855 : oparen_out:
856 0 : if (!*s)
857 : ; /* Actually this is an error, but we don't care about it. */
858 : else
859 0 : s++;
860 : }
861 0 : else if (*s == '\"' || *s == '[')
862 0 : {
863 : /* We do not check for non-allowed nesting of domainliterals */
864 0 : int term = *s == '\"' ? '\"' : ']';
865 0 : invalid = 0;
866 0 : s++;
867 0 : t = NULL;
868 :
869 : for (;;)
870 : {
871 0 : for (s2 = s; *s2; s2++)
872 : {
873 0 : if (*s2 == term)
874 0 : break;
875 0 : else if (*s2 == '\\' && s2[1]) /* what about continuation? */
876 0 : s2++;
877 : }
878 :
879 0 : t = (t
880 0 : ? append_to_token (t, s, s2 - s)
881 0 : : new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s));
882 0 : if (!t)
883 0 : goto failure;
884 :
885 0 : if (*s2 || !hdr->next || !hdr->next->cont)
886 : break;
887 : /* Next item is a header continuation line. */
888 0 : hdr = hdr->next;
889 0 : s = hdr->line;
890 0 : }
891 0 : *tok_tail = t;
892 0 : tok_tail = &t->next;
893 0 : s = s2;
894 0 : if (*s)
895 0 : s++; /* skip the delimiter */
896 : }
897 0 : else if ((s2 = strchr (delimiters2, *s)))
898 : { /* Special characters which are not handled above. */
899 0 : invalid = 0;
900 0 : t = new_token (tSPECIAL, s, 1);
901 0 : if (!t)
902 0 : goto failure;
903 0 : *tok_tail = t;
904 0 : tok_tail = &t->next;
905 0 : s++;
906 : }
907 0 : else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
908 : {
909 0 : invalid = 0;
910 0 : s = skip_ws (s + 1);
911 : }
912 0 : else if (*s > 0x20 && !(*s & 128))
913 : { /* Atom. */
914 0 : invalid = 0;
915 0 : for (s2 = s + 1; *s2 > 0x20
916 0 : && !(*s2 & 128) && !strchr (delimiters, *s2); s2++)
917 : ;
918 0 : t = new_token (tATOM, s, s2 - s);
919 0 : if (!t)
920 0 : goto failure;
921 0 : *tok_tail = t;
922 0 : tok_tail = &t->next;
923 0 : s = s2;
924 : }
925 : else
926 : { /* Invalid character. */
927 0 : if (!invalid)
928 : { /* For parsing we assume only one space. */
929 0 : t = new_token (tSPACE, NULL, 0);
930 0 : if (!t)
931 0 : goto failure;
932 0 : *tok_tail = t;
933 0 : tok_tail = &t->next;
934 0 : invalid = 1;
935 : }
936 0 : s++;
937 : }
938 0 : }
939 : /*NOTREACHED*/
940 :
941 : failure:
942 : {
943 0 : int save = errno;
944 0 : release_token_list (tok);
945 0 : errno = save;
946 : }
947 0 : return NULL;
948 : }
949 :
950 :
951 :
952 :
953 : /****************
954 : * Find and parse a header field.
955 : * WHICH indicates what to do if there are multiple instance of the same
956 : * field (like "Received"); the following value are defined:
957 : * -1 := Take the last occurrence
958 : * 0 := Reserved
959 : * n := Take the n-th one.
960 : * Returns a handle for further operations on the parse context of the field
961 : * or NULL if the field was not found.
962 : */
963 : rfc822parse_field_t
964 0 : rfc822parse_parse_field (rfc822parse_t msg, const char *name, int which)
965 : {
966 : HDR_LINE hdr;
967 :
968 0 : if (!which)
969 0 : return NULL;
970 :
971 0 : hdr = find_header (msg, name, which, NULL);
972 0 : if (!hdr)
973 0 : return NULL;
974 0 : return parse_field (hdr);
975 : }
976 :
977 : void
978 0 : rfc822parse_release_field (rfc822parse_field_t ctx)
979 : {
980 0 : if (ctx)
981 0 : release_token_list (ctx);
982 0 : }
983 :
984 :
985 :
986 : /****************
987 : * Check whether T points to a parameter.
988 : * A parameter starts with a semicolon and it is assumed that t
989 : * points to exactly this one.
990 : */
991 : static int
992 0 : is_parameter (TOKEN t)
993 : {
994 0 : t = t->next;
995 0 : if (!t || t->type != tATOM)
996 0 : return 0;
997 0 : t = t->next;
998 0 : if (!t || !(t->type == tSPECIAL && t->data[0] == '='))
999 0 : return 0;
1000 0 : t = t->next;
1001 0 : if (!t)
1002 0 : return 1; /* We assume that an non existing value is an empty one. */
1003 0 : return t->type == tQUOTED || t->type == tATOM;
1004 : }
1005 :
1006 : /*
1007 : Some header (Content-type) have a special syntax where attribute=value
1008 : pairs are used after a leading semicolon. The parse_field code
1009 : knows about these fields and changes the parsing to the one defined
1010 : in RFC2045.
1011 : Returns a pointer to the value which is valid as long as the
1012 : parse context is valid; NULL is returned in case that attr is not
1013 : defined in the header, a missing value is reppresented by an empty string.
1014 :
1015 : With LOWER_VALUE set to true, a matching field valuebe be
1016 : lowercased.
1017 :
1018 : Note, that ATTR should be lowercase.
1019 : */
1020 : const char *
1021 0 : rfc822parse_query_parameter (rfc822parse_field_t ctx, const char *attr,
1022 : int lower_value)
1023 : {
1024 : TOKEN t, a;
1025 :
1026 0 : for (t = ctx; t; t = t->next)
1027 : {
1028 : /* skip to the next semicolon */
1029 0 : for (; t && !(t->type == tSPECIAL && t->data[0] == ';'); t = t->next)
1030 : ;
1031 0 : if (!t)
1032 0 : return NULL;
1033 0 : if (is_parameter (t))
1034 : { /* Look closer. */
1035 0 : a = t->next; /* We know that this is an atom */
1036 0 : if ( !a->flags.lowered )
1037 : {
1038 0 : lowercase_string (a->data);
1039 0 : a->flags.lowered = 1;
1040 : }
1041 0 : if (!strcmp (a->data, attr))
1042 : { /* found */
1043 0 : t = a->next->next;
1044 : /* Either T is now an atom, a quoted string or NULL in
1045 : * which case we return an empty string. */
1046 :
1047 0 : if ( lower_value && t && !t->flags.lowered )
1048 : {
1049 0 : lowercase_string (t->data);
1050 0 : t->flags.lowered = 1;
1051 : }
1052 0 : return t ? t->data : "";
1053 : }
1054 : }
1055 : }
1056 0 : return NULL;
1057 : }
1058 :
1059 : /****************
1060 : * This function may be used for the Content-Type header to figure out
1061 : * the media type and subtype. Note, that the returned strings are
1062 : * guaranteed to be lowercase as required by MIME.
1063 : *
1064 : * Returns: a pointer to the media type and if subtype is not NULL,
1065 : * a pointer to the subtype.
1066 : */
1067 : const char *
1068 0 : rfc822parse_query_media_type (rfc822parse_field_t ctx, const char **subtype)
1069 : {
1070 0 : TOKEN t = ctx;
1071 : const char *type;
1072 :
1073 0 : if (t->type != tATOM)
1074 0 : return NULL;
1075 0 : if (!t->flags.lowered)
1076 : {
1077 0 : lowercase_string (t->data);
1078 0 : t->flags.lowered = 1;
1079 : }
1080 0 : type = t->data;
1081 0 : t = t->next;
1082 0 : if (!t || t->type != tSPECIAL || t->data[0] != '/')
1083 0 : return NULL;
1084 0 : t = t->next;
1085 0 : if (!t || t->type != tATOM)
1086 0 : return NULL;
1087 :
1088 0 : if (subtype)
1089 : {
1090 0 : if (!t->flags.lowered)
1091 : {
1092 0 : lowercase_string (t->data);
1093 0 : t->flags.lowered = 1;
1094 : }
1095 0 : *subtype = t->data;
1096 : }
1097 0 : return type;
1098 : }
1099 :
1100 :
1101 :
1102 :
1103 :
1104 : #ifdef TESTING
1105 :
1106 : /* Internal debug function to print the structure of the message. */
1107 : static void
1108 : dump_structure (rfc822parse_t msg, part_t part, int indent)
1109 : {
1110 : if (!part)
1111 : {
1112 : printf ("*** Structure of this message:\n");
1113 : part = msg->parts;
1114 : }
1115 :
1116 : for (; part; part = part->right)
1117 : {
1118 : rfc822parse_field_t ctx;
1119 : part_t save_part; /* ugly hack - we should have a function to
1120 : get part information. */
1121 : const char *s;
1122 :
1123 : save_part = msg->current_part;
1124 : msg->current_part = part;
1125 : ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
1126 : msg->current_part = save_part;
1127 : if (ctx)
1128 : {
1129 : const char *s1, *s2;
1130 : s1 = rfc822parse_query_media_type (ctx, &s2);
1131 : if (s1)
1132 : printf ("*** %*s %s/%s", indent*2, "", s1, s2);
1133 : else
1134 : printf ("*** %*s [not found]", indent*2, "");
1135 :
1136 : s = rfc822parse_query_parameter (ctx, "boundary", 0);
1137 : if (s)
1138 : printf (" (boundary=\"%s\")", s);
1139 : rfc822parse_release_field (ctx);
1140 : }
1141 : else
1142 : printf ("*** %*s text/plain [assumed]", indent*2, "");
1143 : putchar('\n');
1144 :
1145 : if (part->down)
1146 : dump_structure (msg, part->down, indent + 1);
1147 : }
1148 :
1149 : }
1150 :
1151 :
1152 :
1153 : static void
1154 : show_param (rfc822parse_field_t ctx, const char *name)
1155 : {
1156 : const char *s;
1157 :
1158 : if (!ctx)
1159 : return;
1160 : s = rfc822parse_query_parameter (ctx, name, 0);
1161 : if (s)
1162 : printf ("*** %s: '%s'\n", name, s);
1163 : }
1164 :
1165 :
1166 :
1167 : static void
1168 : show_event (rfc822parse_event_t event)
1169 : {
1170 : const char *s;
1171 :
1172 : switch (event)
1173 : {
1174 : case RFC822PARSE_OPEN: s= "Open"; break;
1175 : case RFC822PARSE_CLOSE: s= "Close"; break;
1176 : case RFC822PARSE_CANCEL: s= "Cancel"; break;
1177 : case RFC822PARSE_T2BODY: s= "T2Body"; break;
1178 : case RFC822PARSE_FINISH: s= "Finish"; break;
1179 : case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break;
1180 : case RFC822PARSE_LEVEL_DOWN: s= "Level_Down"; break;
1181 : case RFC822PARSE_LEVEL_UP: s= "Level_Up"; break;
1182 : case RFC822PARSE_BOUNDARY: s= "Boundary"; break;
1183 : case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break;
1184 : case RFC822PARSE_BEGIN_HEADER: s= "Begin_Header"; break;
1185 : case RFC822PARSE_PREAMBLE: s= "Preamble"; break;
1186 : case RFC822PARSE_EPILOGUE: s= "Epilogue"; break;
1187 : default: s= "***invalid event***"; break;
1188 : }
1189 : printf ("*** got RFC822 event %s\n", s);
1190 : }
1191 :
1192 : static int
1193 : msg_cb (void *dummy_arg, rfc822parse_event_t event, rfc822parse_t msg)
1194 : {
1195 : show_event (event);
1196 : if (event == RFC822PARSE_T2BODY)
1197 : {
1198 : rfc822parse_field_t ctx;
1199 : void *ectx;
1200 : const char *line;
1201 :
1202 : for (ectx=NULL; (line = rfc822parse_enum_header_lines (msg, &ectx)); )
1203 : {
1204 : printf ("*** HDR: %s\n", line);
1205 : }
1206 : rfc822parse_enum_header_lines (NULL, &ectx); /* Close enumerator. */
1207 :
1208 : ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
1209 : if (ctx)
1210 : {
1211 : const char *s1, *s2;
1212 : s1 = rfc822parse_query_media_type (ctx, &s2);
1213 : if (s1)
1214 : printf ("*** media: '%s/%s'\n", s1, s2);
1215 : else
1216 : printf ("*** media: [not found]\n");
1217 : show_param (ctx, "boundary");
1218 : show_param (ctx, "protocol");
1219 : rfc822parse_release_field (ctx);
1220 : }
1221 : else
1222 : printf ("*** media: text/plain [assumed]\n");
1223 :
1224 : }
1225 :
1226 :
1227 : return 0;
1228 : }
1229 :
1230 :
1231 :
1232 : int
1233 : main (int argc, char **argv)
1234 : {
1235 : char line[5000];
1236 : size_t length;
1237 : rfc822parse_t msg;
1238 :
1239 : msg = rfc822parse_open (msg_cb, NULL);
1240 : if (!msg)
1241 : abort ();
1242 :
1243 : while (fgets (line, sizeof (line), stdin))
1244 : {
1245 : length = strlen (line);
1246 : if (length && line[length - 1] == '\n')
1247 : line[--length] = 0;
1248 : if (length && line[length - 1] == '\r')
1249 : line[--length] = 0;
1250 : if (rfc822parse_insert (msg, line, length))
1251 : abort ();
1252 : }
1253 :
1254 : dump_structure (msg, NULL, 0);
1255 :
1256 : rfc822parse_close (msg);
1257 : return 0;
1258 : }
1259 : #endif
1260 :
1261 : /*
1262 : Local Variables:
1263 : compile-command: "gcc -Wall -Wno-pointer-sign -g -DTESTING -o rfc822parse rfc822parse.c"
1264 : End:
1265 : */
|