LCOV - code coverage report
Current view: top level - tools - rfc822parse.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 460 0.0 %
Date: 2016-09-12 13:01:59 Functions: 0 31 0.0 %

          Line data    Source code
       1             : /* rfc822parse.c - Simple mail and MIME parser
       2             :  *      Copyright (C) 1999, 2000 Werner Koch, Duesseldorf
       3             :  *      Copyright (C) 2003, 2004 g10 Code GmbH
       4             :  *
       5             :  * This program is free software; you can redistribute it and/or
       6             :  * modify it under the terms of the GNU Lesser General Public License
       7             :  * as published by the Free Software Foundation; either version 3 of
       8             :  * the License, or (at your option) any later version.
       9             :  *
      10             :  * This program is distributed in the hope that it will be useful,
      11             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
      13             :  * GNU Lesser General Public License for more details.
      14             :  *
      15             :  * You should have received a copy of the GNU Lesser General Public
      16             :  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
      17             :  */
      18             : 
      19             : 
      20             : /* According to RFC822 binary zeroes are allowed at many places. We do
      21             :  * not handle this correct especially in the field parsing code.  It
      22             :  * should be easy to fix and the API provides a interfaces which
      23             :  * returns the length but in addition makes sure that returned strings
      24             :  * are always ended by a \0.
      25             :  *
      26             :  * Furthermore, the case of field names is changed and thus it is not
      27             :  * always a good idea to use these modified header
      28             :  * lines (e.g. signatures may break).
      29             :  */
      30             : 
      31             : #ifdef HAVE_CONFIG_H
      32             : #include <config.h>
      33             : #endif
      34             : 
      35             : #include <stdlib.h>
      36             : #include <stdio.h>
      37             : #include <string.h>
      38             : #include <errno.h>
      39             : #include <stdarg.h>
      40             : #include <assert.h>
      41             : 
      42             : #include "rfc822parse.h"
      43             : 
      44             : enum token_type
      45             :   {
      46             :     tSPACE,
      47             :     tATOM,
      48             :     tQUOTED,
      49             :     tDOMAINLIT,
      50             :     tSPECIAL
      51             :   };
      52             : 
      53             : /* For now we directly use our TOKEN as the parse context */
      54             : typedef struct rfc822parse_field_context *TOKEN;
      55             : struct rfc822parse_field_context
      56             : {
      57             :   TOKEN next;
      58             :   enum token_type type;
      59             :   struct {
      60             :     unsigned int cont:1;
      61             :     unsigned int lowered:1;
      62             :   } flags;
      63             :   /*TOKEN owner_pantry; */
      64             :   char data[1];
      65             : };
      66             : 
      67             : struct hdr_line
      68             : {
      69             :   struct hdr_line *next;
      70             :   int cont;     /* This is a continuation of the previous line. */
      71             :   unsigned char line[1];
      72             : };
      73             : 
      74             : typedef struct hdr_line *HDR_LINE;
      75             : 
      76             : 
      77             : struct part
      78             : {
      79             :   struct part *right;     /* The next part. */
      80             :   struct part *down;      /* A contained part. */
      81             :   HDR_LINE hdr_lines;       /* Header lines os that part. */
      82             :   HDR_LINE *hdr_lines_tail; /* Helper for adding lines. */
      83             :   char *boundary;           /* Only used in the first part. */
      84             : };
      85             : typedef struct part *part_t;
      86             : 
      87             : struct rfc822parse_context
      88             : {
      89             :   rfc822parse_cb_t callback;
      90             :   void *callback_value;
      91             :   int callback_error;
      92             :   int in_body;
      93             :   int in_preamble;      /* Wether we are before the first boundary. */
      94             :   part_t parts;         /* The tree of parts. */
      95             :   part_t current_part;  /* Whom we are processing (points into parts). */
      96             :   const char *boundary; /* Current boundary. */
      97             : };
      98             : 
      99             : static HDR_LINE find_header (rfc822parse_t msg, const char *name,
     100             :                              int which, HDR_LINE * rprev);
     101             : 
     102             : 
     103             : static size_t
     104           0 : length_sans_trailing_ws (const unsigned char *line, size_t len)
     105             : {
     106             :   const unsigned char *p, *mark;
     107             :   size_t n;
     108             : 
     109           0 :   for (mark=NULL, p=line, n=0; n < len; n++, p++)
     110             :     {
     111           0 :       if (strchr (" \t\r\n", *p ))
     112             :         {
     113           0 :           if( !mark )
     114           0 :             mark = p;
     115             :         }
     116             :       else
     117           0 :         mark = NULL;
     118             :     }
     119             : 
     120           0 :   if (mark)
     121           0 :     return mark - line;
     122           0 :   return len;
     123             : }
     124             : 
     125             : 
     126             : static void
     127           0 : lowercase_string (unsigned char *string)
     128             : {
     129           0 :   for (; *string; string++)
     130           0 :     if (*string >= 'A' && *string <= 'Z')
     131           0 :       *string = *string - 'A' + 'a';
     132           0 : }
     133             : 
     134             : /* Transform a header name into a standard capitalized format; i.e
     135             :    "Content-Type".  Conversion stops at the colon.  As usual we don't
     136             :    use the localized versions of ctype.h.
     137             :  */
     138             : static void
     139           0 : capitalize_header_name (unsigned char *name)
     140             : {
     141           0 :   int first = 1;
     142             : 
     143           0 :   for (; *name && *name != ':'; name++)
     144           0 :     if (*name == '-')
     145           0 :       first = 1;
     146           0 :     else if (first)
     147             :       {
     148           0 :         if (*name >= 'a' && *name <= 'z')
     149           0 :           *name = *name - 'a' + 'A';
     150           0 :         first = 0;
     151             :       }
     152           0 :     else if (*name >= 'A' && *name <= 'Z')
     153           0 :       *name = *name - 'A' + 'a';
     154           0 : }
     155             : 
     156             : #ifndef HAVE_STPCPY
     157             : static char *
     158             : stpcpy (char *a,const char *b)
     159             : {
     160             :   while (*b)
     161             :     *a++ = *b++;
     162             :   *a = 0;
     163             : 
     164             :   return (char*)a;
     165             : }
     166             : #endif
     167             : 
     168             : 
     169             : /* If a callback has been registerd, call it for the event of type
     170             :    EVENT. */
     171             : static int
     172           0 : do_callback (rfc822parse_t msg, rfc822parse_event_t event)
     173             : {
     174             :   int rc;
     175             : 
     176           0 :   if (!msg->callback || msg->callback_error)
     177           0 :     return 0;
     178           0 :   rc = msg->callback (msg->callback_value, event, msg);
     179           0 :   if (rc)
     180           0 :     msg->callback_error = rc;
     181           0 :   return rc;
     182             : }
     183             : 
     184             : static part_t
     185           0 : new_part (void)
     186             : {
     187             :   part_t part;
     188             : 
     189           0 :   part = calloc (1, sizeof *part);
     190           0 :   if (part)
     191             :     {
     192           0 :       part->hdr_lines_tail = &part->hdr_lines;
     193             :     }
     194           0 :   return part;
     195             : }
     196             : 
     197             : 
     198             : static void
     199           0 : release_part (part_t part)
     200             : {
     201             :   part_t tmp;
     202             :   HDR_LINE hdr, hdr2;
     203             : 
     204           0 :   for (; part; part = tmp)
     205             :     {
     206           0 :       tmp = part->right;
     207           0 :       if (part->down)
     208           0 :         release_part (part->down);
     209           0 :       for (hdr = part->hdr_lines; hdr; hdr = hdr2)
     210             :         {
     211           0 :           hdr2 = hdr->next;
     212           0 :           free (hdr);
     213             :         }
     214           0 :       free (part->boundary);
     215           0 :       free (part);
     216             :     }
     217           0 : }
     218             : 
     219             : 
     220             : static void
     221           0 : release_handle_data (rfc822parse_t msg)
     222             : {
     223           0 :   release_part (msg->parts);
     224           0 :   msg->parts = NULL;
     225           0 :   msg->current_part = NULL;
     226           0 :   msg->boundary = NULL;
     227           0 : }
     228             : 
     229             : 
     230             : /* Create a new parsing context for an entire rfc822 message and
     231             :    return it.  CB and CB_VALUE may be given to callback for certain
     232             :    events.  NULL is returned on error with errno set appropriately. */
     233             : rfc822parse_t
     234           0 : rfc822parse_open (rfc822parse_cb_t cb, void *cb_value)
     235             : {
     236           0 :   rfc822parse_t msg = calloc (1, sizeof *msg);
     237           0 :   if (msg)
     238             :     {
     239           0 :       msg->parts = msg->current_part = new_part ();
     240           0 :       if (!msg->parts)
     241             :         {
     242           0 :           free (msg);
     243           0 :           msg = NULL;
     244             :         }
     245             :       else
     246             :         {
     247           0 :           msg->callback = cb;
     248           0 :           msg->callback_value = cb_value;
     249           0 :           if (do_callback (msg, RFC822PARSE_OPEN))
     250             :             {
     251           0 :               release_handle_data (msg);
     252           0 :               free (msg);
     253           0 :               msg = NULL;
     254             :             }
     255             :         }
     256             :     }
     257           0 :   return msg;
     258             : }
     259             : 
     260             : 
     261             : void
     262           0 : rfc822parse_cancel (rfc822parse_t msg)
     263             : {
     264           0 :   if (msg)
     265             :     {
     266           0 :       do_callback (msg, RFC822PARSE_CANCEL);
     267           0 :       release_handle_data (msg);
     268           0 :       free (msg);
     269             :     }
     270           0 : }
     271             : 
     272             : 
     273             : void
     274           0 : rfc822parse_close (rfc822parse_t msg)
     275             : {
     276           0 :   if (msg)
     277             :     {
     278           0 :       do_callback (msg, RFC822PARSE_CLOSE);
     279           0 :       release_handle_data (msg);
     280           0 :       free (msg);
     281             :     }
     282           0 : }
     283             : 
     284             : static part_t
     285           0 : find_parent (part_t tree, part_t target)
     286             : {
     287             :   part_t part;
     288             : 
     289           0 :   for (part = tree->down; part; part = part->right)
     290             :     {
     291           0 :       if (part == target)
     292           0 :         return tree; /* Found. */
     293           0 :       if (part->down)
     294             :         {
     295           0 :           part_t tmp = find_parent (part, target);
     296           0 :           if (tmp)
     297           0 :             return tmp;
     298             :         }
     299             :     }
     300           0 :   return NULL;
     301             : }
     302             : 
     303             : static void
     304           0 : set_current_part_to_parent (rfc822parse_t msg)
     305             : {
     306             :   part_t parent;
     307             : 
     308           0 :   assert (msg->current_part);
     309           0 :   parent = find_parent (msg->parts, msg->current_part);
     310           0 :   if (!parent)
     311           0 :     return; /* Already at the top. */
     312             : 
     313             : #ifndef NDEBUG
     314             :   {
     315             :     part_t part;
     316           0 :     for (part = parent->down; part; part = part->right)
     317           0 :       if (part == msg->current_part)
     318           0 :         break;
     319           0 :     assert (part);
     320             :   }
     321             : #endif
     322           0 :   msg->current_part = parent;
     323             : 
     324           0 :   parent = find_parent (msg->parts, parent);
     325           0 :   msg->boundary = parent? parent->boundary: NULL;
     326             : }
     327             : 
     328             : 
     329             : 
     330             : /****************
     331             :  * We have read in all header lines and are about to receive the body
     332             :  * part.  The delimiter line has already been processed.
     333             :  *
     334             :  * FIXME: we's better return an error in case of memory failures.
     335             :  */
     336             : static int
     337           0 : transition_to_body (rfc822parse_t msg)
     338             : {
     339             :   rfc822parse_field_t ctx;
     340             :   int rc;
     341             : 
     342           0 :   rc = do_callback (msg, RFC822PARSE_T2BODY);
     343           0 :   if (!rc)
     344             :     {
     345             :       /* Store the boundary if we have multipart type. */
     346           0 :       ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
     347           0 :       if (ctx)
     348             :         {
     349             :           const char *s;
     350             : 
     351           0 :           s = rfc822parse_query_media_type (ctx, NULL);
     352           0 :           if (s && !strcmp (s,"multipart"))
     353             :             {
     354           0 :               s = rfc822parse_query_parameter (ctx, "boundary", 0);
     355           0 :               if (s)
     356             :                 {
     357           0 :                   assert (!msg->current_part->boundary);
     358           0 :                   msg->current_part->boundary = malloc (strlen (s) + 1);
     359           0 :                   if (msg->current_part->boundary)
     360             :                     {
     361             :                       part_t part;
     362             : 
     363           0 :                       strcpy (msg->current_part->boundary, s);
     364           0 :                       msg->boundary = msg->current_part->boundary;
     365           0 :                       part = new_part ();
     366           0 :                       if (!part)
     367             :                         {
     368           0 :                           int save_errno = errno;
     369           0 :                           rfc822parse_release_field (ctx);
     370           0 :                           errno = save_errno;
     371           0 :                           return -1;
     372             :                         }
     373           0 :                       rc = do_callback (msg, RFC822PARSE_LEVEL_DOWN);
     374           0 :                       assert (!msg->current_part->down);
     375           0 :                       msg->current_part->down = part;
     376           0 :                       msg->current_part = part;
     377           0 :                       msg->in_preamble = 1;
     378             :                     }
     379             :                 }
     380             :             }
     381           0 :           rfc822parse_release_field (ctx);
     382             :         }
     383             :     }
     384             : 
     385           0 :   return rc;
     386             : }
     387             : 
     388             : /* We have just passed a MIME boundary and need to prepare for new part.
     389             :    headers. */
     390             : static int
     391           0 : transition_to_header (rfc822parse_t msg)
     392             : {
     393             :   part_t part;
     394             : 
     395           0 :   assert (msg->current_part);
     396           0 :   assert (!msg->current_part->right);
     397             : 
     398           0 :   part = new_part ();
     399           0 :   if (!part)
     400           0 :     return -1;
     401             : 
     402           0 :   msg->current_part->right = part;
     403           0 :   msg->current_part = part;
     404           0 :   return 0;
     405             : }
     406             : 
     407             : 
     408             : static int
     409           0 : insert_header (rfc822parse_t msg, const unsigned char *line, size_t length)
     410             : {
     411             :   HDR_LINE hdr;
     412             : 
     413           0 :   assert (msg->current_part);
     414           0 :   if (!length)
     415             :     {
     416           0 :       msg->in_body = 1;
     417           0 :       return transition_to_body (msg);
     418             :     }
     419             : 
     420           0 :   if (!msg->current_part->hdr_lines)
     421           0 :     do_callback (msg, RFC822PARSE_BEGIN_HEADER);
     422             : 
     423           0 :   length = length_sans_trailing_ws (line, length);
     424           0 :   hdr = malloc (sizeof (*hdr) + length);
     425           0 :   if (!hdr)
     426           0 :     return -1;
     427           0 :   hdr->next = NULL;
     428           0 :   hdr->cont = (*line == ' ' || *line == '\t');
     429           0 :   memcpy (hdr->line, line, length);
     430           0 :   hdr->line[length] = 0; /* Make it a string. */
     431             : 
     432             :   /* Transform a field name into canonical format. */
     433           0 :   if (!hdr->cont && strchr (line, ':'))
     434           0 :      capitalize_header_name (hdr->line);
     435             : 
     436           0 :   *msg->current_part->hdr_lines_tail = hdr;
     437           0 :   msg->current_part->hdr_lines_tail = &hdr->next;
     438             : 
     439             :   /* Lets help the caller to prevent mail loops and issue an event for
     440             :    * every Received header. */
     441           0 :   if (length >= 9 && !memcmp (line, "Received:", 9))
     442           0 :      do_callback (msg, RFC822PARSE_RCVD_SEEN);
     443           0 :   return 0;
     444             : }
     445             : 
     446             : 
     447             : /****************
     448             :  * Note: We handle the body transparent to allow binary zeroes in it.
     449             :  */
     450             : static int
     451           0 : insert_body (rfc822parse_t msg, const unsigned char *line, size_t length)
     452             : {
     453           0 :   int rc = 0;
     454             : 
     455           0 :   if (length > 2 && *line == '-' && line[1] == '-' && msg->boundary)
     456             :     {
     457           0 :       size_t blen = strlen (msg->boundary);
     458             : 
     459           0 :       if (length == blen + 2
     460           0 :           && !memcmp (line+2, msg->boundary, blen))
     461             :         {
     462           0 :           rc = do_callback (msg, RFC822PARSE_BOUNDARY);
     463           0 :           msg->in_body = 0;
     464           0 :           if (!rc && !msg->in_preamble)
     465           0 :             rc = transition_to_header (msg);
     466           0 :           msg->in_preamble = 0;
     467             :         }
     468           0 :       else if (length == blen + 4
     469           0 :           && line[length-2] =='-' && line[length-1] == '-'
     470           0 :           && !memcmp (line+2, msg->boundary, blen))
     471             :         {
     472           0 :           rc = do_callback (msg, RFC822PARSE_LAST_BOUNDARY);
     473           0 :           msg->boundary = NULL; /* No current boundary anymore. */
     474           0 :           set_current_part_to_parent (msg);
     475             : 
     476             :           /* Fixme: The next should actually be send right before the
     477             :              next boundary, so that we can mark the epilogue. */
     478           0 :           if (!rc)
     479           0 :             rc = do_callback (msg, RFC822PARSE_LEVEL_UP);
     480             :         }
     481             :     }
     482           0 :   if (msg->in_preamble && !rc)
     483           0 :     rc = do_callback (msg, RFC822PARSE_PREAMBLE);
     484             : 
     485           0 :   return rc;
     486             : }
     487             : 
     488             : /* Insert the next line into the parser. Return 0 on success or true
     489             :    on error with errno set appropriately. */
     490             : int
     491           0 : rfc822parse_insert (rfc822parse_t msg, const unsigned char *line, size_t length)
     492             : {
     493           0 :   return (msg->in_body
     494             :           ? insert_body (msg, line, length)
     495           0 :           : insert_header (msg, line, length));
     496             : }
     497             : 
     498             : 
     499             : /* Tell the parser that we have finished the message. */
     500             : int
     501           0 : rfc822parse_finish (rfc822parse_t msg)
     502             : {
     503           0 :   return do_callback (msg, RFC822PARSE_FINISH);
     504             : }
     505             : 
     506             : 
     507             : 
     508             : /****************
     509             :  * Get a copy of a header line. The line is returned as one long
     510             :  * string with LF to separate the continuation line. Caller must free
     511             :  * the return buffer.  WHICH may be used to enumerate over all lines.
     512             :  * Wildcards are allowed.  This function works on the current headers;
     513             :  * i.e. the regular mail headers or the MIME headers of the current
     514             :  * part.
     515             :  *
     516             :  * WHICH gives the mode:
     517             :  *  -1 := Take the last occurrence
     518             :  *   n := Take the n-th  one.
     519             :  *
     520             :  * Returns a newly allocated buffer or NULL on error.  errno is set in
     521             :  * case of a memory failure or set to 0 if the requested field is not
     522             :  * available.
     523             :  *
     524             :  * If VALUEOFF is not NULL it will receive the offset of the first non
     525             :  * space character in the value part of the line (i.e. after the first
     526             :  * colon).
     527             :  */
     528             : char *
     529           0 : rfc822parse_get_field (rfc822parse_t msg, const char *name, int which,
     530             :                        size_t *valueoff)
     531             : {
     532             :   HDR_LINE h, h2;
     533             :   char *buf, *p;
     534             :   size_t n;
     535             : 
     536           0 :   h = find_header (msg, name, which, NULL);
     537           0 :   if (!h)
     538             :     {
     539           0 :       errno = 0;
     540           0 :       return NULL; /* no such field */
     541             :     }
     542             : 
     543           0 :   n = strlen (h->line) + 1;
     544           0 :   for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
     545           0 :     n += strlen (h2->line) + 1;
     546             : 
     547           0 :   buf = p = malloc (n);
     548           0 :   if (buf)
     549             :     {
     550           0 :       p = stpcpy (p, h->line);
     551           0 :       *p++ = '\n';
     552           0 :       for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
     553             :         {
     554           0 :           p = stpcpy (p, h2->line);
     555           0 :           *p++ = '\n';
     556             :         }
     557           0 :       p[-1] = 0;
     558             :     }
     559             : 
     560           0 :   if (valueoff)
     561             :     {
     562           0 :       p = strchr (buf, ':');
     563           0 :       if (!p)
     564           0 :         *valueoff = 0; /* Oops: should never happen. */
     565             :       else
     566             :         {
     567           0 :           p++;
     568           0 :           while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
     569           0 :             p++;
     570           0 :           *valueoff = p - buf;
     571             :         }
     572             :     }
     573             : 
     574           0 :   return buf;
     575             : }
     576             : 
     577             : 
     578             : /****************
     579             :  * Enumerate all header.  Caller has to provide the address of a pointer
     580             :  * which has to be initialzed to NULL, the caller should then never change this
     581             :  * pointer until he has closed the enumeration by passing again the address
     582             :  * of the pointer but with msg set to NULL.
     583             :  * The function returns pointers to all the header lines or NULL when
     584             :  * all lines have been enumerated or no headers are available.
     585             :  */
     586             : const char *
     587           0 : rfc822parse_enum_header_lines (rfc822parse_t msg, void **context)
     588             : {
     589             :   HDR_LINE l;
     590             : 
     591           0 :   if (!msg) /* Close. */
     592           0 :     return NULL;
     593             : 
     594           0 :   if (*context == msg || !msg->current_part)
     595           0 :     return NULL;
     596             : 
     597           0 :   l = *context ? (HDR_LINE) *context : msg->current_part->hdr_lines;
     598             : 
     599           0 :   if (l)
     600             :     {
     601           0 :       *context = l->next ? (void *) (l->next) : (void *) msg;
     602           0 :       return l->line;
     603             :     }
     604           0 :   *context = msg; /* Mark end of list. */
     605           0 :   return NULL;
     606             : }
     607             : 
     608             : 
     609             : 
     610             : /****************
     611             :  * Find a header field.  If the Name does end in an asterisk this is meant
     612             :  * to be a wildcard.
     613             :  *
     614             :  *  which  -1 : Retrieve the last field
     615             :  *         >0 : Retrieve the n-th field
     616             : 
     617             :  * RPREV may be used to return the predecessor of the returned field;
     618             :  * which may be NULL for the very first one. It has to be initialzed
     619             :  * to either NULL in which case the search start at the first header line,
     620             :  * or it may point to a headerline, where the search should start
     621             :  */
     622             : static HDR_LINE
     623           0 : find_header (rfc822parse_t msg, const char *name, int which, HDR_LINE *rprev)
     624             : {
     625           0 :   HDR_LINE hdr, prev = NULL, mark = NULL;
     626             :   unsigned char *p;
     627             :   size_t namelen, n;
     628           0 :   int found = 0;
     629           0 :   int glob = 0;
     630             : 
     631           0 :   if (!msg->current_part)
     632           0 :     return NULL;
     633             : 
     634           0 :   namelen = strlen (name);
     635           0 :   if (namelen && name[namelen - 1] == '*')
     636             :     {
     637           0 :       namelen--;
     638           0 :       glob = 1;
     639             :     }
     640             : 
     641           0 :   hdr = msg->current_part->hdr_lines;
     642           0 :   if (rprev && *rprev)
     643             :     {
     644             :       /* spool forward to the requested starting place.
     645             :        * we cannot simply set this as we have to return
     646             :        * the previous list element too */
     647           0 :       for (; hdr && hdr != *rprev; prev = hdr, hdr = hdr->next)
     648             :         ;
     649             :     }
     650             : 
     651           0 :   for (; hdr; prev = hdr, hdr = hdr->next)
     652             :     {
     653           0 :       if (hdr->cont)
     654           0 :         continue;
     655           0 :       if (!(p = strchr (hdr->line, ':')))
     656           0 :         continue;               /* invalid header, just skip it. */
     657           0 :       n = p - hdr->line;
     658           0 :       if (!n)
     659           0 :         continue;               /* invalid name */
     660           0 :       if ((glob ? (namelen <= n) : (namelen == n))
     661           0 :           && !memcmp (hdr->line, name, namelen))
     662             :         {
     663           0 :           found++;
     664           0 :           if (which == -1)
     665           0 :             mark = hdr;
     666           0 :           else if (found == which)
     667             :             {
     668           0 :               if (rprev)
     669           0 :                 *rprev = prev;
     670           0 :               return hdr;
     671             :             }
     672             :         }
     673             :     }
     674           0 :   if (mark && rprev)
     675           0 :     *rprev = prev;
     676           0 :   return mark;
     677             : }
     678             : 
     679             : 
     680             : 
     681             : static const char *
     682           0 : skip_ws (const char *s)
     683             : {
     684           0 :   while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
     685           0 :     s++;
     686           0 :   return s;
     687             : }
     688             : 
     689             : 
     690             : static void
     691           0 : release_token_list (TOKEN t)
     692             : {
     693           0 :   while (t)
     694             :     {
     695           0 :       TOKEN t2 = t->next;
     696             :       /* fixme: If we have owner_pantry, put the token back to
     697             :        * this pantry so that it can be reused later */
     698           0 :       free (t);
     699           0 :       t = t2;
     700             :     }
     701           0 : }
     702             : 
     703             : 
     704             : static TOKEN
     705           0 : new_token (enum token_type type, const char *buf, size_t length)
     706             : {
     707             :   TOKEN t;
     708             : 
     709             :   /* fixme: look through our pantries to find a suitable
     710             :    * token for reuse */
     711           0 :   t = malloc (sizeof *t + length);
     712           0 :   if (t)
     713             :     {
     714           0 :       t->next = NULL;
     715           0 :       t->type = type;
     716           0 :       memset (&t->flags, 0, sizeof (t->flags));
     717           0 :       t->data[0] = 0;
     718           0 :       if (buf)
     719             :         {
     720           0 :           memcpy (t->data, buf, length);
     721           0 :           t->data[length] = 0;       /* Make sure it is a C string. */
     722             :         }
     723             :       else
     724           0 :         t->data[0] = 0;
     725             :     }
     726           0 :   return t;
     727             : }
     728             : 
     729             : static TOKEN
     730           0 : append_to_token (TOKEN old, const char *buf, size_t length)
     731             : {
     732           0 :   size_t n = strlen (old->data);
     733             :   TOKEN t;
     734             : 
     735           0 :   t = malloc (sizeof *t + n + length);
     736           0 :   if (t)
     737             :     {
     738           0 :       t->next = old->next;
     739           0 :       t->type = old->type;
     740           0 :       t->flags = old->flags;
     741           0 :       memcpy (t->data, old->data, n);
     742           0 :       memcpy (t->data + n, buf, length);
     743           0 :       t->data[n + length] = 0;
     744           0 :       old->next = NULL;
     745           0 :       release_token_list (old);
     746             :     }
     747           0 :   return t;
     748             : }
     749             : 
     750             : 
     751             : 
     752             : /*
     753             :    Parse a field into tokens as defined by rfc822.
     754             :  */
     755             : static TOKEN
     756           0 : parse_field (HDR_LINE hdr)
     757             : {
     758             :   static const char specials[] = "<>@.,;:\\[]\"()";
     759             :   static const char specials2[] = "<>@.,;:";
     760             :   static const char tspecials[] = "/?=<>@,;:\\[]\"()";
     761             :   static const char tspecials2[] = "/?=<>@.,;:";  /* FIXME: really
     762             :                                                      include '.'?*/
     763             :   static struct
     764             :   {
     765             :     const unsigned char *name;
     766             :     size_t namelen;
     767             :   } tspecial_header[] = {
     768             :     { "Content-Type", 12},
     769             :     { "Content-Transfer-Encoding", 25},
     770             :     { "Content-Disposition", 19},
     771             :     { NULL, 0}
     772             :   };
     773             :   const char *delimiters;
     774             :   const char *delimiters2;
     775             :   const unsigned char *line, *s, *s2;
     776             :   size_t n;
     777           0 :   int i, invalid = 0;
     778             :   TOKEN t, tok, *tok_tail;
     779             : 
     780           0 :   errno = 0;
     781           0 :   if (!hdr)
     782           0 :     return NULL;
     783             : 
     784           0 :   tok = NULL;
     785           0 :   tok_tail = &tok;
     786             : 
     787           0 :   line = hdr->line;
     788           0 :   if (!(s = strchr (line, ':')))
     789           0 :     return NULL; /* oops */
     790             : 
     791           0 :   n = s - line;
     792           0 :   if (!n)
     793           0 :     return NULL; /* oops: invalid name */
     794             : 
     795           0 :   delimiters = specials;
     796           0 :   delimiters2 = specials2;
     797           0 :   for (i = 0; tspecial_header[i].name; i++)
     798             :     {
     799           0 :       if (n == tspecial_header[i].namelen
     800           0 :           && !memcmp (line, tspecial_header[i].name, n))
     801             :         {
     802           0 :           delimiters = tspecials;
     803           0 :           delimiters2 = tspecials2;
     804           0 :           break;
     805             :         }
     806             :     }
     807             : 
     808           0 :   s++; /* Move over the colon. */
     809             :   for (;;)
     810             :     {
     811           0 :       while (!*s)
     812             :         {
     813           0 :           if (!hdr->next || !hdr->next->cont)
     814           0 :             return tok; /* Ready.  */
     815             : 
     816             :           /* Next item is a header continuation line.  */
     817           0 :           hdr = hdr->next;
     818           0 :           s = hdr->line;
     819             :         }
     820             : 
     821           0 :       if (*s == '(')
     822             :         {
     823           0 :           int level = 1;
     824           0 :           int in_quote = 0;
     825             : 
     826           0 :           invalid = 0;
     827           0 :           for (s++;; s++)
     828             :             {
     829           0 :               while (!*s)
     830             :                 {
     831           0 :                   if (!hdr->next || !hdr->next->cont)
     832             :                     goto oparen_out;
     833             :                   /* Next item is a header continuation line.  */
     834           0 :                   hdr = hdr->next;
     835           0 :                   s = hdr->line;
     836             :                 }
     837             : 
     838           0 :               if (in_quote)
     839             :                 {
     840           0 :                   if (*s == '\"')
     841           0 :                     in_quote = 0;
     842           0 :                   else if (*s == '\\' && s[1])  /* what about continuation? */
     843           0 :                     s++;
     844             :                 }
     845           0 :               else if (*s == ')')
     846             :                 {
     847           0 :                   if (!--level)
     848           0 :                     break;
     849             :                 }
     850           0 :               else if (*s == '(')
     851           0 :                 level++;
     852           0 :               else if (*s == '\"')
     853           0 :                 in_quote = 1;
     854           0 :             }
     855             :         oparen_out:
     856           0 :           if (!*s)
     857             :             ; /* Actually this is an error, but we don't care about it. */
     858             :           else
     859           0 :             s++;
     860             :         }
     861           0 :       else if (*s == '\"' || *s == '[')
     862           0 :         {
     863             :           /* We do not check for non-allowed nesting of domainliterals */
     864           0 :           int term = *s == '\"' ? '\"' : ']';
     865           0 :           invalid = 0;
     866           0 :           s++;
     867           0 :           t = NULL;
     868             : 
     869             :           for (;;)
     870             :             {
     871           0 :               for (s2 = s; *s2; s2++)
     872             :                 {
     873           0 :                   if (*s2 == term)
     874           0 :                     break;
     875           0 :                   else if (*s2 == '\\' && s2[1]) /* what about continuation? */
     876           0 :                     s2++;
     877             :                 }
     878             : 
     879           0 :               t = (t
     880           0 :                    ? append_to_token (t, s, s2 - s)
     881           0 :                    : new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s));
     882           0 :               if (!t)
     883           0 :                 goto failure;
     884             : 
     885           0 :               if (*s2 || !hdr->next || !hdr->next->cont)
     886             :                 break;
     887             :               /* Next item is a header continuation line.  */
     888           0 :               hdr = hdr->next;
     889           0 :               s = hdr->line;
     890           0 :             }
     891           0 :           *tok_tail = t;
     892           0 :           tok_tail = &t->next;
     893           0 :           s = s2;
     894           0 :           if (*s)
     895           0 :             s++; /* skip the delimiter */
     896             :         }
     897           0 :       else if ((s2 = strchr (delimiters2, *s)))
     898             :         { /* Special characters which are not handled above. */
     899           0 :           invalid = 0;
     900           0 :           t = new_token (tSPECIAL, s, 1);
     901           0 :           if (!t)
     902           0 :             goto failure;
     903           0 :           *tok_tail = t;
     904           0 :           tok_tail = &t->next;
     905           0 :           s++;
     906             :         }
     907           0 :       else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
     908             :         {
     909           0 :           invalid = 0;
     910           0 :           s = skip_ws (s + 1);
     911             :         }
     912           0 :       else if (*s > 0x20 && !(*s & 128))
     913             :         { /* Atom. */
     914           0 :           invalid = 0;
     915           0 :           for (s2 = s + 1; *s2 > 0x20
     916           0 :                && !(*s2 & 128) && !strchr (delimiters, *s2); s2++)
     917             :             ;
     918           0 :           t = new_token (tATOM, s, s2 - s);
     919           0 :           if (!t)
     920           0 :             goto failure;
     921           0 :           *tok_tail = t;
     922           0 :           tok_tail = &t->next;
     923           0 :           s = s2;
     924             :         }
     925             :       else
     926             :         { /* Invalid character. */
     927           0 :           if (!invalid)
     928             :             { /* For parsing we assume only one space. */
     929           0 :               t = new_token (tSPACE, NULL, 0);
     930           0 :               if (!t)
     931           0 :                 goto failure;
     932           0 :               *tok_tail = t;
     933           0 :               tok_tail = &t->next;
     934           0 :               invalid = 1;
     935             :             }
     936           0 :           s++;
     937             :         }
     938           0 :     }
     939             :   /*NOTREACHED*/
     940             : 
     941             :  failure:
     942             :   {
     943           0 :     int save = errno;
     944           0 :     release_token_list (tok);
     945           0 :     errno = save;
     946             :   }
     947           0 :   return NULL;
     948             : }
     949             : 
     950             : 
     951             : 
     952             : 
     953             : /****************
     954             :  * Find and parse a header field.
     955             :  * WHICH indicates what to do if there are multiple instance of the same
     956             :  * field (like "Received"); the following value are defined:
     957             :  *  -1 := Take the last occurrence
     958             :  *   0 := Reserved
     959             :  *   n := Take the n-th one.
     960             :  * Returns a handle for further operations on the parse context of the field
     961             :  * or NULL if the field was not found.
     962             :  */
     963             : rfc822parse_field_t
     964           0 : rfc822parse_parse_field (rfc822parse_t msg, const char *name, int which)
     965             : {
     966             :   HDR_LINE hdr;
     967             : 
     968           0 :   if (!which)
     969           0 :     return NULL;
     970             : 
     971           0 :   hdr = find_header (msg, name, which, NULL);
     972           0 :   if (!hdr)
     973           0 :     return NULL;
     974           0 :   return parse_field (hdr);
     975             : }
     976             : 
     977             : void
     978           0 : rfc822parse_release_field (rfc822parse_field_t ctx)
     979             : {
     980           0 :   if (ctx)
     981           0 :     release_token_list (ctx);
     982           0 : }
     983             : 
     984             : 
     985             : 
     986             : /****************
     987             :  * Check whether T points to a parameter.
     988             :  * A parameter starts with a semicolon and it is assumed that t
     989             :  * points to exactly this one.
     990             :  */
     991             : static int
     992           0 : is_parameter (TOKEN t)
     993             : {
     994           0 :   t = t->next;
     995           0 :   if (!t || t->type != tATOM)
     996           0 :     return 0;
     997           0 :   t = t->next;
     998           0 :   if (!t || !(t->type == tSPECIAL && t->data[0] == '='))
     999           0 :     return 0;
    1000           0 :   t = t->next;
    1001           0 :   if (!t)
    1002           0 :     return 1; /* We assume that an non existing value is an empty one. */
    1003           0 :   return t->type == tQUOTED || t->type == tATOM;
    1004             : }
    1005             : 
    1006             : /*
    1007             :    Some header (Content-type) have a special syntax where attribute=value
    1008             :    pairs are used after a leading semicolon.  The parse_field code
    1009             :    knows about these fields and changes the parsing to the one defined
    1010             :    in RFC2045.
    1011             :    Returns a pointer to the value which is valid as long as the
    1012             :    parse context is valid; NULL is returned in case that attr is not
    1013             :    defined in the header, a missing value is reppresented by an empty string.
    1014             : 
    1015             :    With LOWER_VALUE set to true, a matching field valuebe be
    1016             :    lowercased.
    1017             : 
    1018             :    Note, that ATTR should be lowercase.
    1019             :  */
    1020             : const char *
    1021           0 : rfc822parse_query_parameter (rfc822parse_field_t ctx, const char *attr,
    1022             :                              int lower_value)
    1023             : {
    1024             :   TOKEN t, a;
    1025             : 
    1026           0 :   for (t = ctx; t; t = t->next)
    1027             :     {
    1028             :       /* skip to the next semicolon */
    1029           0 :       for (; t && !(t->type == tSPECIAL && t->data[0] == ';'); t = t->next)
    1030             :         ;
    1031           0 :       if (!t)
    1032           0 :         return NULL;
    1033           0 :       if (is_parameter (t))
    1034             :         { /* Look closer. */
    1035           0 :           a = t->next; /* We know that this is an atom */
    1036           0 :           if ( !a->flags.lowered )
    1037             :             {
    1038           0 :               lowercase_string (a->data);
    1039           0 :               a->flags.lowered = 1;
    1040             :             }
    1041           0 :           if (!strcmp (a->data, attr))
    1042             :             { /* found */
    1043           0 :               t = a->next->next;
    1044             :               /* Either T is now an atom, a quoted string or NULL in
    1045             :                * which case we return an empty string. */
    1046             : 
    1047           0 :               if ( lower_value && t && !t->flags.lowered )
    1048             :                 {
    1049           0 :                   lowercase_string (t->data);
    1050           0 :                   t->flags.lowered = 1;
    1051             :                 }
    1052           0 :               return t ? t->data : "";
    1053             :             }
    1054             :         }
    1055             :     }
    1056           0 :   return NULL;
    1057             : }
    1058             : 
    1059             : /****************
    1060             :  * This function may be used for the Content-Type header to figure out
    1061             :  * the media type and subtype.  Note, that the returned strings are
    1062             :  * guaranteed to be lowercase as required by MIME.
    1063             :  *
    1064             :  * Returns: a pointer to the media type and if subtype is not NULL,
    1065             :  *          a pointer to the subtype.
    1066             :  */
    1067             : const char *
    1068           0 : rfc822parse_query_media_type (rfc822parse_field_t ctx, const char **subtype)
    1069             : {
    1070           0 :   TOKEN t = ctx;
    1071             :   const char *type;
    1072             : 
    1073           0 :   if (t->type != tATOM)
    1074           0 :     return NULL;
    1075           0 :   if (!t->flags.lowered)
    1076             :     {
    1077           0 :       lowercase_string (t->data);
    1078           0 :       t->flags.lowered = 1;
    1079             :     }
    1080           0 :   type = t->data;
    1081           0 :   t = t->next;
    1082           0 :   if (!t || t->type != tSPECIAL || t->data[0] != '/')
    1083           0 :     return NULL;
    1084           0 :   t = t->next;
    1085           0 :   if (!t || t->type != tATOM)
    1086           0 :     return NULL;
    1087             : 
    1088           0 :   if (subtype)
    1089             :     {
    1090           0 :       if (!t->flags.lowered)
    1091             :         {
    1092           0 :           lowercase_string (t->data);
    1093           0 :           t->flags.lowered = 1;
    1094             :         }
    1095           0 :       *subtype = t->data;
    1096             :     }
    1097           0 :   return type;
    1098             : }
    1099             : 
    1100             : 
    1101             : 
    1102             : 
    1103             : 
    1104             : #ifdef TESTING
    1105             : 
    1106             : /* Internal debug function to print the structure of the message. */
    1107             : static void
    1108             : dump_structure (rfc822parse_t msg, part_t part, int indent)
    1109             : {
    1110             :   if (!part)
    1111             :     {
    1112             :       printf ("*** Structure of this message:\n");
    1113             :       part = msg->parts;
    1114             :     }
    1115             : 
    1116             :   for (; part; part = part->right)
    1117             :     {
    1118             :       rfc822parse_field_t ctx;
    1119             :       part_t save_part; /* ugly hack - we should have a function to
    1120             :                            get part information. */
    1121             :       const char *s;
    1122             : 
    1123             :       save_part = msg->current_part;
    1124             :       msg->current_part = part;
    1125             :       ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
    1126             :       msg->current_part = save_part;
    1127             :       if (ctx)
    1128             :         {
    1129             :           const char *s1, *s2;
    1130             :           s1 = rfc822parse_query_media_type (ctx, &s2);
    1131             :           if (s1)
    1132             :             printf ("***   %*s %s/%s", indent*2, "", s1, s2);
    1133             :           else
    1134             :             printf ("***   %*s [not found]", indent*2, "");
    1135             : 
    1136             :           s = rfc822parse_query_parameter (ctx, "boundary", 0);
    1137             :           if (s)
    1138             :             printf (" (boundary=\"%s\")", s);
    1139             :           rfc822parse_release_field (ctx);
    1140             :         }
    1141             :       else
    1142             :         printf ("***   %*s text/plain [assumed]", indent*2, "");
    1143             :       putchar('\n');
    1144             : 
    1145             :       if (part->down)
    1146             :         dump_structure (msg, part->down, indent + 1);
    1147             :     }
    1148             : 
    1149             : }
    1150             : 
    1151             : 
    1152             : 
    1153             : static void
    1154             : show_param (rfc822parse_field_t ctx, const char *name)
    1155             : {
    1156             :   const char *s;
    1157             : 
    1158             :   if (!ctx)
    1159             :     return;
    1160             :   s = rfc822parse_query_parameter (ctx, name, 0);
    1161             :   if (s)
    1162             :     printf ("***   %s: '%s'\n", name, s);
    1163             : }
    1164             : 
    1165             : 
    1166             : 
    1167             : static void
    1168             : show_event (rfc822parse_event_t event)
    1169             : {
    1170             :   const char *s;
    1171             : 
    1172             :   switch (event)
    1173             :     {
    1174             :     case RFC822PARSE_OPEN: s= "Open"; break;
    1175             :     case RFC822PARSE_CLOSE: s= "Close"; break;
    1176             :     case RFC822PARSE_CANCEL: s= "Cancel"; break;
    1177             :     case RFC822PARSE_T2BODY: s= "T2Body"; break;
    1178             :     case RFC822PARSE_FINISH: s= "Finish"; break;
    1179             :     case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break;
    1180             :     case RFC822PARSE_LEVEL_DOWN: s= "Level_Down"; break;
    1181             :     case RFC822PARSE_LEVEL_UP:   s= "Level_Up"; break;
    1182             :     case RFC822PARSE_BOUNDARY: s= "Boundary"; break;
    1183             :     case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break;
    1184             :     case RFC822PARSE_BEGIN_HEADER: s= "Begin_Header"; break;
    1185             :     case RFC822PARSE_PREAMBLE: s= "Preamble"; break;
    1186             :     case RFC822PARSE_EPILOGUE: s= "Epilogue"; break;
    1187             :     default: s= "***invalid event***"; break;
    1188             :     }
    1189             :   printf ("*** got RFC822 event %s\n", s);
    1190             : }
    1191             : 
    1192             : static int
    1193             : msg_cb (void *dummy_arg, rfc822parse_event_t event, rfc822parse_t msg)
    1194             : {
    1195             :   show_event (event);
    1196             :   if (event == RFC822PARSE_T2BODY)
    1197             :     {
    1198             :       rfc822parse_field_t ctx;
    1199             :       void *ectx;
    1200             :       const char *line;
    1201             : 
    1202             :       for (ectx=NULL; (line = rfc822parse_enum_header_lines (msg, &ectx)); )
    1203             :         {
    1204             :           printf ("*** HDR: %s\n", line);
    1205             :         }
    1206             :       rfc822parse_enum_header_lines (NULL, &ectx); /* Close enumerator. */
    1207             : 
    1208             :       ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
    1209             :       if (ctx)
    1210             :         {
    1211             :           const char *s1, *s2;
    1212             :           s1 = rfc822parse_query_media_type (ctx, &s2);
    1213             :           if (s1)
    1214             :             printf ("***   media: '%s/%s'\n", s1, s2);
    1215             :           else
    1216             :             printf ("***   media: [not found]\n");
    1217             :           show_param (ctx, "boundary");
    1218             :           show_param (ctx, "protocol");
    1219             :           rfc822parse_release_field (ctx);
    1220             :         }
    1221             :       else
    1222             :         printf ("***   media: text/plain [assumed]\n");
    1223             : 
    1224             :     }
    1225             : 
    1226             : 
    1227             :   return 0;
    1228             : }
    1229             : 
    1230             : 
    1231             : 
    1232             : int
    1233             : main (int argc, char **argv)
    1234             : {
    1235             :   char line[5000];
    1236             :   size_t length;
    1237             :   rfc822parse_t msg;
    1238             : 
    1239             :   msg = rfc822parse_open (msg_cb, NULL);
    1240             :   if (!msg)
    1241             :     abort ();
    1242             : 
    1243             :   while (fgets (line, sizeof (line), stdin))
    1244             :     {
    1245             :       length = strlen (line);
    1246             :       if (length && line[length - 1] == '\n')
    1247             :         line[--length] = 0;
    1248             :       if (length && line[length - 1] == '\r')
    1249             :         line[--length] = 0;
    1250             :       if (rfc822parse_insert (msg, line, length))
    1251             :         abort ();
    1252             :     }
    1253             : 
    1254             :   dump_structure (msg, NULL, 0);
    1255             : 
    1256             :   rfc822parse_close (msg);
    1257             :   return 0;
    1258             : }
    1259             : #endif
    1260             : 
    1261             : /*
    1262             : Local Variables:
    1263             : compile-command: "gcc -Wall -Wno-pointer-sign -g -DTESTING -o rfc822parse rfc822parse.c"
    1264             : End:
    1265             : */

Generated by: LCOV version 1.11