/*
 * This module handles parsing request content as form data, building
 * a symbol table of name/value pairs via a caller-supplied callback function.
 *
 * Author:   David Jones
 * Date:     12-FEB-2000
 * Revised:  24-MAR-2001	Define special symbols for .filename attribute.
 * Revised:  25-JAN-2002	Cleanup lineset structure used for parsing
 *				multipart content.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <descrip.h>
/*
 * Declare local structures:
 *    sym_table_def		table entry for header line attributes
 *    line_set			Tracks logical lines within content.
 *    line_body			Individual line range.
 */
struct sym_table_def {
    char *name, *value;
};
struct line_body {
    char *text;
    int length;
    int offset;
};
struct line_set {
    int count;			/* number lines defined */
    int body_alloc;
    struct line_body *line;
    char *data;
};

typedef int *INTEGER;
static char * unescape_string ( char *string, int *length );
static void parse_multipart_form ( int *context,
	char *clause, char *content, int content_length,
	int define_symbol ( INTEGER, char *, int, char *, int ) );
/****************************************************************************/
/* Main routine for generating symbol table from form data.  Note that
 * data pointed to by content_type and fdata may be modified by this function.
 */
int hpss_parse_form_content ( INTEGER context, char *content_type,
	char *fdata, int fdata_len,
	int define_symbol ( INTEGER, char *, int, char *, int ) )
{
    /*
     * Examime content type.
     */
    if ( strncmp(content_type, "APPLICATION/X-WWW-FORM-URLENCODED", 33) == 0) {
	int start, finish, flen, i, j, length;
	/*
	 * Easy case, parse the data.  First split along '&' and then sub-divide
	 * on the '=' into the name and value.
	 */
	length = fdata_len;
	if ( fdata[length-1] == '&' ) length = length -1;
	start = 0;
	finish = 0;
	for ( i = 0; i <= length; i++ ) if ( (fdata[i] == '&') || (i==length)) {
	    /*
	     * Value parsed.  Unescape characters and look for first '='
	     * to delimit field name from value.
	     */
	    flen = i - start;
#ifndef KEEP_PLUSES
	    for ( j = start; j < i; j++ ) 
		if ( fdata[j] == '+' ) fdata[j] = ' ';
#endif
	    unescape_string ( &fdata[start], &flen );
	    finish = start + flen;  /* fdata[start..finish] is full string */
	    for ( j = start; j < finish; j++ ) if ( fdata[j] == '=' ) {
		/*
		 * fdata[start..j-1] is name, fdata[j+1..finish-1] is value.
		 */
		if ( 1&define_symbol(context, &fdata[start], j-start,
			&fdata[j+1], finish-j-1 ) ) {
		} else {
		    /* error */
		}
		break;
	    } else {
		/* make field name upcase */
		fdata[j] = _toupper(fdata[j]);
		if ( fdata[j] == '-' ) fdata[j] = '_';
	    }
	    start = i+1;
	}
    } else if ( strncmp(content_type, "MULTIPART/FORM-DATA", 19 ) == 0 ) {
	char *clause;
	/*
	 * New form type allows sending files,  clause string will
	 * contain attributes (namely the boundary tag).
	 */
	clause = strchr ( content_type, ';' );
	if ( !clause ) clause = ""; else clause++;
	parse_multipart_form ( context, clause, fdata, fdata_len, 
		define_symbol );

    } else {
	return 20;
    }
    return 1;
}
/**************************************************************************/
/*  The following group of routines manage line_set structures.
 */
static void init_lineset ( struct line_set *set, int initial_alloc,
	char *data )
{
    set->body_alloc = initial_alloc;
    if ( initial_alloc > 0 ) set->line = (struct line_body *) malloc (
	sizeof(struct line_body) * initial_alloc );
    else set->body_alloc = 0;
    set->count = 0;
    set->data = data;
}

static void append_line ( int offset, int length, struct line_set *set )
{
    if ( set->count >= set->body_alloc ) {
	set->body_alloc = set->body_alloc*2 + 100;
	set->line = (struct line_body *) realloc ( set->line,
		sizeof(struct line_body) * set->body_alloc );
    }
    set->line[set->count].text = &set->data[offset];
    set->line[set->count].length = length;
    set->line[set->count].offset = offset;
    set->count++;
}
static struct line_set *parse_lines ( char *data, int length )
{
    struct line_set *set;
    int i, start, state;
    char octet;
    set = (struct line_set *) malloc ( sizeof(struct line_set) );
    if ( !set ) return set;
    init_lineset ( set, (length/40) + 20, data );

    state = 0;
    start = 0;
    for ( i = 0; i < length; i++ ) {
	octet = data[i];
	if ( state == 0 ) {
	    if ( octet == '\r' ) state = 1;
	    else if ( octet == '\n' ) {
		append_line ( start, i - start, set );
		start = i + 1;
	    }
	} else if ( state == 1 ) {
	    if ( octet == '\n' ) {
		append_line ( start, i - start - 1, set );
		start = i + 1;
		state = 0;
	    } else if ( octet != '\r' ) state = 0;
	}
    }
    if ( i != start ) {		/* missing final delimiter */
	append_line ( start, i - start, set );
    }
    return set;
}
static void delete_lineset ( struct line_set *set )
{
    if ( set->body_alloc > 0 ) free ( set->line );
    set->body_alloc = 0;
    set->count = 0;
    free ( set );
}
/**************************************************************************/
/* Convert escaped characters in string to actual values.
 *
 * Arguments:
 * 	string		Character string.  Modified.
 *	length		Int.  On input, original length of string.
 *			On output, final length of unescaped string.
 */
static char * unescape_string ( char *string, int *length )
{
    int i, j, reslen, modified;
    /*
     * Scan string.
     */
    for ( modified = reslen = i = 0; i < *length; i++ ) {
	if ( string[i] == '%' ) {
	    /*
	     * Escape seen, decode next 2 characters as hex and replace
	     * all three with single byte.
	     */
	    char value[4];
	    int val;
	    value[0] = string[i+1]; value[1] = string[i+2]; value[2] = '\0';
	    i += 2;
	    sscanf ( value, "%2x", &val );
	    if  ( val > 127 ) val |= (-1 ^ 255);	/* Sign extend */
	    string[reslen] = val;
	    modified = 1;
        } 
	else {
	    /* Only copy bytes if escape edit took place. */
	    if ( modified ) string[reslen] = string[i];
	}
        reslen++;
    }
    /* Return value is point to string editted. */
    *length = reslen;
    return string;
}
/**************************************************************************/
/* Scan string for name=value; or name="value"; and build symbol table.
 */
static int parse_attributes ( char *list, struct sym_table_def *table,
	int table_size )
{
    int start, i, state, length, size;
    char cur, *name, *value;

    length = strlen(list);
    state = 0;
    size = 0;
    for ( i = 0; i < length; i++ ) {
	cur = list[i];
	if ( state == 0 ) {
	    if ( cur == ';' ) continue;
	    if ( cur == ' ' ) continue;
	    if ( cur == '\t' ) continue;
	    name = &list[i];
	    list[i] = _toupper(cur);
	    state = 1;			/* look for name end */
	} else if ( state == 1 ) {
	    if ( (cur == ' ') || (cur == '\t') ) {
		list[i] = '\0';
		continue;
	    } else if ( cur == ';' ) {
		/* attribute with no name. */
		state = 0;
		table[size].name = name;
		table[size].value = "";
		list[i] = '\0';
		size++;
		if ( (size+1) >= table_size ) break;
	    } else if ( cur == '=' ) {
		list[i] = '\0';
		value = &list[i+1];
		state = 2;
	    } else {
		list[i] = _toupper(cur);
	    }
	} else if ( state == 2 ) {
	    if ( (cur == ' ') || (cur == '\t') ) continue;
	    else if ( cur == '"' ) {
		state = 3;
		value = &list[i+1];
	    } else if ( cur == ';' ) {
	    } else {
		state = 5;
	    }
	} else if ( state == 3 ) {
	    if ( cur == '"' ) {
		list[i] = '\0';
		state = 4;
	    }
	} else if ( state == 4 ) {  	/* look for next ';' */
	    if ( cur == ';' ) {
		state = 0;
		table[size].name = name;
		table[size].value = value;
		size++;
		if ( (size+1) >= table_size ) break;
	    }
	} else if ( state == 5 ) {
	    if ( cur == ';' ) {
		list[i] = '\0';
		state = 0;
		table[size].name = name;
		table[size].value = value;
		size++;
		if ( (size+1) >= table_size ) break;
	    } else if ( (cur==' ') || (cur=='\t') ) {
		list[i] = '\0';
	    }
	}
    }
    if ( state > 2 ) {
	table[size].name = name;
	table[size].value = value;
	size++;
    } else if ( state == 1 ) {
	table[size].name = name;
	table[size].value = "";
	size++;
    }
#ifdef DEBUG
for ( i = 0; i < size; i++ ) printf ( "table[%d]: '%s' = '%s'\n", i,
table[i].name, table[i].value );
#endif
    table[size].name = "";
    return size;
}
/*
 * Search symbol table for a name matching name.
 */
static struct sym_table_def *table_search ( struct 
	sym_table_def *table, char *name )
{
    int i;
    for ( i = 0; table[i].name; i++ ) {
	if ( table[i].name[0] == '\0' ) break;
	if ( strcmp ( table[i].name, name ) == 0 ) return &table[i];
    }
    return (struct sym_table_def *) 0;
}
/**************************************************************************/
/*  Decode the multipart form data.
 */
static void parse_multipart_form ( INTEGER context,
	char *clause, char *content, int content_length,
	int define_symbol ( INTEGER, char *, int, char *, int ) )
{
    int i, j, start, bndlen, def_status;
    struct line_set *set;
    struct sym_table_def table[1024], *def, *ndef, *fndef, *ctdef;
    char boundary[512], label[512];
    /*
     * Search clause string for boundary tag.
     */
    def_status = define_symbol ( context, "MULTIPART", 9, "yes", 3 );
    if ( (def_status&1) == 0 ) return;
    i = parse_attributes ( clause, table, 1024 );
    boundary[0] = '\0';
    def = table_search ( table, "BOUNDARY" );
    if ( def ) {
	if ( strlen(def->value) > 500 ) return;
	sprintf ( boundary, "--%s", def->value );
    } else return;
     bndlen = strlen(boundary);
    /*
     * Parse into lines and look for boundary lines.
     */
    set = parse_lines ( content, content_length );
    if ( !set ) return;
    start = 0;
    for ( i = 0; i < set->count; i++ ) {
	struct line_body *cur;
	char saved_octet;
	/*
	 * Skip lines that aren't boundary lines as well as first boundary.
	 */
	cur = &set->line[i];
	if ( cur->length < bndlen ) continue;   /* too short */
	if ( strncmp ( cur->text, boundary, bndlen ) != 0 ) continue;
	if ( i == 0 ) { start = 1; continue; };
	/*
	 * line[start..i-1] is set of lines comprising body, one of which
	 * should be a zero-length line splitting the part header from
	 * the body.  Parse and scan the header lines, looking for the
	 * name clause in the content-disposition header and content type.
	 */
	ndef = fndef = (struct sym_table_def *) 0;
	ctdef = (struct sym_table_def *) 0;
	for ( j = start; j < i; j++ ) {
	    char *hdr;
	    int k, attr_count;
	    if ( set->line[j].length == 0 ) break;   /* end of header */
	    /*
	     * If next line is a continuation line, convert it.
	     */
	    if ( ((j+1) < start) &&  (set->line[j].text[0] == ' ') ||
			(set->line[j].text[0] == '\t') ) {
		/*
		 * Replace the line terminator with spaces and
		 * remove the line.
		 */
		for ( k = set->line[j].offset + set->line[j].length;
		    k < set->line[j+1].offset; k++ ) set->data[k] = ' ';
		set->line[j].length += 
			(set->line[j+1].offset - set->line[j].offset);
		for ( k = (j+2); k < set->count; k++ ) {
		    set->line[k-1] = set->line[k];
		}
		/*
		 * Adjust the counters.
		 */
		--set->count;
		--j;
		continue;
	    }
	    /*
	     * Parse the header line.  The label is upcased for comparison and
	     * the other attributes are loaded into table of name/value
	     * pairs.
	     */
	    hdr = set->line[j].text;
	    for ( attr_count = k = 0; k < set->line[j].length; k++ ) {
		if ( hdr[k] == ':' ) {
		   hdr[set->line[j].length] = '\0';
		   attr_count = parse_attributes ( &hdr[k+1], table, 1024 );
		   break;
		}
		label[k] = _toupper(hdr[k]);
	    }
	    label[k] = '\0';
	    /*
	     * deal with specific header types.
	     */
	    if ( attr_count < 1 ) {
		/* Failed to parse anything after label. */
	    } else if ( strcmp ( label, "CONTENT-DISPOSITION" ) == 0 ) {
		/*
		 * Make disposition if FORM-DATA, lookup the name attribute.
		 */
		if ( strcmp ( table[0].name, "FORM-DATA" ) == 0 ) {
		    ndef = table_search ( &table[1], "NAME" );
		    fndef = table_search ( &table[1], "FILENAME" );
		}
	    } else if ( strcmp ( label, "CONTENT-TYPE" ) == 0 ) {
		ctdef = &table[0];
	    }
	}
	/*
	 * Add a form table symbol is parse of header found a valid
	 * content-disposition line (FORM-DATA; name="xxx").
	 */
	if ( ndef ) {
	    int first, last, k;
	    /*
	     * Upcase the name, compute the data size.
	     */
	    for ( k = 0; ndef->value[k]; k++ ) 
		ndef->value[k] = _toupper(ndef->value[k]);
	    first = set->line[j+1].offset;
	    last = set->line[i-1].offset + set->line[i-1].length;

	    define_symbol ( context, ndef->value, strlen(ndef->value),
		set->line[j+1].text, last - first );
	    /*
	     * Make secondary symbols if requested by callback.  The callback
	     * supplied must make private copies of the key strings.
	     */
	    if ( (def_status==3) && (strlen(ndef->value) < 230) ) {
		char fnkey[256];
	        if ( fndef ) {
		    /* Finame as part of content-disposition header */
		    sprintf ( fnkey, "%s.FILENAME", ndef->value );
		    define_symbol ( context, fnkey, strlen(fnkey),
			fndef->value, strlen(fndef->value)  );
		}
		if ( ctdef ) {
		    /* Content-type header */
		    sprintf ( fnkey, "%s.CONTENT-TYPE", ndef->value );
		    define_symbol ( context, fnkey, strlen(fnkey),
			ctdef->name, strlen(ctdef->name)  );
		}
	    }
	}
	/*
	 * Terminate loop if current boundary line is EOD marker marker
	 * (boundary tag followed by "--").
	 */
	start = i + 1;
	if ( cur->length >= (bndlen + 2) ) {
	     if  ( cur->text[bndlen] == '-' && (cur->text[bndlen+1] == '-') ) {
		break;
	    }
	}
    }
    delete_lineset ( set );
    return;
}
