/*
 * This module handles parsing request content as form data, building
 * a symbol table of name/value pairs via a caller-supplied callback function.
 *
 * Author:   David Jones
 * Date:     12-FEB-2000
 * Revised:  24-MAR-2001	Define special symbols for .filename attribute.
 * Revised:  24-JAN-2002	Rework for MSTs
 */
#include "mst_share.h"
#include "tutil.h"
#include "tmemory.h"
#include "mst_parse_form.h"
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
/*
 * Declare local structures:
 *    sym_table_def		table entry for header line attributes
 *    line_set			Tracks logical lines within content.
 *    line_body			Individual line range.
 */
struct sym_table_def {
    char *name, *value;
};
struct line_body {
    char *text;
    int length;
    int offset;
};
struct line_set {
    int count;			/* number lines defined */
    int body_alloc;
    struct line_body *line;
    char *data;
};

static char * unescape_string ( char *string, int *length );
static void parse_multipart_form ( void *context,
	char *clause, char *content, int content_length,
	int define_symbol ( void *, char *, int, char *, int ) );

/****************************************************************************/
/* Main routine for generating symbol table from form data.  Note that
 * data pointed to by content_type and fdata may be modified by this function.
 */
int mst_parse_form_content ( void *context, char *content_type,
	char *fdata, int fdata_len,
	int define_symbol ( void *, char *, int, char *, int ) )
{
    /*
     * Examime content type.
     */
    if ( tu_strncmp(content_type, "APPLICATION/X-WWW-FORM-URLENCODED", 33) == 0) {
	int start, finish, flen, i, j, length;
	/*
	 * Easy case, parse the data.  First split along '&' and then sub-divide
	 * on the '=' into the name and value.
	 */
	length = fdata_len;
	if ( fdata[length-1] == '&' ) length = length -1;
	start = 0;
	finish = 0;
	for ( i = 0; i <= length; i++ ) if ( (fdata[i] == '&') || (i==length)) {
	    /*
	     * Value parsed.  Unescape characters and look for first '='
	     * to delimit field name from value.
	     */
	    flen = i - start;
#ifndef KEEP_PLUSES
	    for ( j = start; j < i; j++ ) 
		if ( fdata[j] == '+' ) fdata[j] = ' ';
#endif
	    unescape_string ( &fdata[start], &flen );
	    finish = start + flen;  /* fdata[start..finish] is full string */
	    for ( j = start; j < finish; j++ ) if ( fdata[j] == '=' ) {
		/*
		 * fdata[start..j-1] is name, fdata[j+1..finish-1] is value.
		 */
		if ( 1&define_symbol(context, &fdata[start], j-start,
			&fdata[j+1], finish-j-1 ) ) {
		} else {
		    /* error */
		}
		break;
	    } else {
		/* make field name upcase */
		fdata[j] = _toupper(fdata[j]);
		if ( fdata[j] == '-' ) fdata[j] = '_';
	    }
	    start = i+1;
	}
    } else if ( tu_strncmp(content_type, "MULTIPART/FORM-DATA", 19 ) == 0 ) {
	char *clause;
	/*
	 * New form type allows sending files,  clause string will
	 * contain attributes (namely the boundary tag).
	 */
	clause = tu_strstr ( content_type, ";" );
	if ( !clause ) clause = ""; else clause++;
	parse_multipart_form ( context, clause, fdata, fdata_len, 
		define_symbol );

    } else {
	return 20;
    }
    return 1;
}
/**************************************************************************/
/*  The following group of routines manage line_set structures.
 */
static void init_lineset ( struct line_set *set, int initial_alloc,
	char *data )
{
    set->body_alloc = initial_alloc;
    if ( initial_alloc > 0 ) set->line = (struct line_body *) tm_malloc (
	sizeof(struct line_body) * initial_alloc );
    else set->body_alloc = 0;
    set->count = 0;
    set->data = data;
}

static void append_line ( int offset, int length, struct line_set *set )
{
    if ( set->count >= set->body_alloc ) {
	set->body_alloc = set->body_alloc*2 + 100;
	set->line = (struct line_body *) tm_realloc ( set->line,
		sizeof(struct line_body) * set->body_alloc );
    }
    set->line[set->count].text = &set->data[offset];
    set->line[set->count].length = length;
    set->line[set->count].offset = offset;
    set->count++;
}
static struct line_set *parse_lines ( char *data, int length )
{
    struct line_set *set;
    int i, start, state;
    char octet;
    set = (struct line_set *) tm_malloc ( sizeof(struct line_set) );
    if ( !set ) return set;
    init_lineset ( set, (length/40) + 20, data );

    state = 0;
    start = 0;
    for ( i = 0; i < length; i++ ) {
	octet = data[i];
	if ( state == 0 ) {
	    if ( octet == '\r' ) state = 1;
	    else if ( octet == '\n' ) {
		append_line ( start, i - start, set );
		start = i + 1;
	    }
	} else if ( state == 1 ) {
	    if ( octet == '\n' ) {
		append_line ( start, i - start - 1, set );
		start = i + 1;
		state = 0;
	    } else if ( octet != '\r' ) state = 0;
	}
    }
    if ( i != start ) {		/* missing final delimiter */
	append_line ( start, i - start, set );
    }
    return set;
}
static void delete_lineset ( struct line_set *set )
{
    if ( set->body_alloc > 0 ) tm_free ( set->line );
    set->body_alloc = 0;
    set->count = 0;
    tm_free ( set );
}
/**************************************************************************/
/* Convert escaped characters in string to actual values.
 *
 * Arguments:
 * 	string		Character string.  Modified.
 *	length		Int.  On input, original length of string.
 *			On output, final length of unescaped string.
 */
static char * unescape_string ( char *string, int *length )
{
    int i, j, reslen, modified;
    /*
     * Scan string.
     */
    for ( modified = reslen = i = 0; i < *length; i++ ) {
	if ( string[i] == '%' ) {
	    /*
	     * Escape seen, decode next 2 characters as hex and replace
	     * all three with single byte.
	     */
	    char value[4];
	    int val;
	    value[0] = string[i+1]; value[1] = string[i+2]; value[2] = '\0';
	    i += 2;
	    val = 0;
	    if ( (value[0] >= '0') && (value[0] <= '9') ) {
		val = val | (value[0]-'0');
	    } else if ( (value[0] >= 'a') && (value[0] <= 'f') ) {
		val = val | (10+(value[0]-'a'));
	    } else if ( (value[0] >= 'A') && (value[0] <= 'F') ) {
		val = val | (10+(value[0]-'A'));
	    }
	    val = val * 16;

	    if ( (value[1] >= '0') && (value[1] <= '9') ) {
		val = val | (value[1]-'0');
	    } else if ( (value[1] >= 'a') && (value[1] <= 'f') ) {
		val = val | (10+(value[1]-'a'));
	    } else if ( (value[1] >= 'A') && (value[1] <= 'F') ) {
		val = val | (10+(value[1]-'A'));
	    }
	    val = val * 16;

	    if  ( val > 127 ) val |= (-1 ^ 255);	/* Sign extend */
	    string[reslen] = val;
	    modified = 1;
        } 
	else {
	    /* Only copy bytes if escape edit took place. */
	    if ( modified ) string[reslen] = string[i];
	}
        reslen++;
    }
    /* Return value is point to string editted. */
    *length = reslen;
    return string;
}
/**************************************************************************/
/* Scan string for name=value; or name="value"; and build symbol table.
 */
static int parse_attributes ( char *list, struct sym_table_def *table,
	int table_size )
{
    int start, i, state, length, size;
    char cur, *name, *value;

    length = tu_strlen(list);
    state = 0;
    size = 0;
    for ( i = 0; i < length; i++ ) {
	cur = list[i];
	if ( state == 0 ) {
	    if ( cur == ';' ) continue;
	    if ( cur == ' ' ) continue;
	    if ( cur == '\t' ) continue;
	    name = &list[i];
	    list[i] = _toupper(cur);
	    state = 1;			/* look for name end */
	} else if ( state == 1 ) {
	    if ( (cur == ' ') || (cur == '\t') ) {
		list[i] = '\0';
		continue;
	    } else if ( cur == ';' ) {
		/* attribute with no name. */
		state = 0;
		table[size].name = name;
		table[size].value = "";
		list[i] = '\0';
		size++;
		if ( (size+1) >= table_size ) break;
	    } else if ( cur == '=' ) {
		list[i] = '\0';
		value = &list[i+1];
		state = 2;
	    } else {
		list[i] = _toupper(cur);
	    }
	} else if ( state == 2 ) {
	    if ( (cur == ' ') || (cur == '\t') ) continue;
	    else if ( cur == '"' ) {
		state = 3;
		value = &list[i+1];
	    } else if ( cur == ';' ) {
	    } else {
		state = 5;
	    }
	} else if ( state == 3 ) {
	    if ( cur == '"' ) {
		list[i] = '\0';
		state = 4;
	    }
	} else if ( state == 4 ) {  	/* look for next ';' */
	    if ( cur == ';' ) {
		state = 0;
		table[size].name = name;
		table[size].value = value;
		size++;
		if ( (size+1) >= table_size ) break;
	    }
	} else if ( state == 5 ) {
	    if ( cur == ';' ) {
		list[i] = '\0';
		state = 0;
		table[size].name = name;
		table[size].value = value;
		size++;
		if ( (size+1) >= table_size ) break;
	    } else if ( (cur==' ') || (cur=='\t') ) {
		list[i] = '\0';
	    }
	}
    }
    if ( state > 2 ) {
	table[size].name = name;
	table[size].value = value;
	size++;
    } else if ( state == 1 ) {
	table[size].name = name;
	table[size].value = "";
	size++;
    }
#ifdef DEBUG
for ( i = 0; i < size; i++ ) printf ( "table[%d]: '%s' = '%s'\n", i,
table[i].name, table[i].value );
#endif
    table[size].name = "";
    return size;
}
/*
 * Search symbol table for a name matching name.
 */
static struct sym_table_def *table_search ( struct 
	sym_table_def *table, char *name )
{
    int i;
    for ( i = 0; table[i].name; i++ ) {
	if ( table[i].name[0] == '\0' ) break;
	if ( strcmp ( table[i].name, name ) == 0 ) return &table[i];
    }
    return (struct sym_table_def *) 0;
}
/**************************************************************************/
/*  Decode the multipart form data.
 */
static void parse_multipart_form ( void *context,
	char *clause, char *content, int content_length,
	int define_symbol ( void *, char *, int, char *, int ) )
{
    int i, j, start, bndlen, def_status;
    struct line_set *set;
    struct sym_table_def table[1024], *def, *ndef, *fndef, *ctdef;
    char boundary[512], label[512];
    /*
     * Search clause string for boundary tag.
     */
    def_status = define_symbol ( context, "MULTIPART", 9, "yes", 3 );
    if ( (def_status&1) == 0 ) return;
    i = parse_attributes ( clause, table, 1024 );
    boundary[0] = '\0';
    def = table_search ( table, "BOUNDARY" );
    if ( def ) {
	if ( tu_strlen(def->value) > 500 ) return;
	boundary[0] = '-';
 	boundary[1] = '-';
	tu_strnzcpy ( boundary, &boundary[2], sizeof(boundary)-3 );
    } else return;
     bndlen = tu_strlen(boundary);
    /*
     * Parse into lines and look for boundary lines.
     */
    set = parse_lines ( content, content_length );
    if ( !set ) return;
    start = 0;
    for ( i = 0; i < set->count; i++ ) {
	struct line_body *cur;
	char saved_octet;
	/*
	 * Skip lines that aren't boundary lines as well as first boundary.
	 */
	cur = &set->line[i];
	if ( cur->length < bndlen ) continue;   /* too short */
	if ( tu_strncmp ( cur->text, boundary, bndlen ) != 0 ) continue;
	if ( i == 0 ) { start = 1; continue; };
	/*
	 * line[start..i-1] is set of lines comprising body, one of which
	 * should be a zero-length line splitting the part header from
	 * the body.  Parse and scan the header lines, looking for the
	 * name clause in the content-disposition header and content type.
	 */
	ndef = fndef = (struct sym_table_def *) 0;
	ctdef = (struct sym_table_def *) 0;
	for ( j = start; j < i; j++ ) {
	    char *hdr;
	    int k, attr_count;
	    if ( set->line[j].length == 0 ) break;   /* end of header */
	    /*
	     * If next line is a continuation line, convert it.
	     */
	    if ( ((j+1) < start) &&  (set->line[j].text[0] == ' ') ||
			(set->line[j].text[0] == '\t') ) {
		/*
		 * Replace the line terminator with spaces and
		 * remove the line.
		 */
		for ( k = set->line[j].offset + set->line[j].length;
		    k < set->line[j+1].offset; k++ ) set->data[k] = ' ';
		set->line[j].length += 
			(set->line[j+1].offset - set->line[j].offset);
		for ( k = (j+2); k < set->count; k++ ) {
		    set->line[k-1] = set->line[k];
		}
		/*
		 * Adjust the counters.
		 */
		--set->count;
		--j;
		continue;
	    }
	    /*
	     * Parse the header line.  The label is upcased for comparison and
	     * the other attributes are loaded into table of name/value
	     * pairs.
	     */
	    hdr = set->line[j].text;
	    for ( attr_count = k = 0; k < set->line[j].length; k++ ) {
		if ( hdr[k] == ':' ) {
		   hdr[set->line[j].length] = '\0';
		   attr_count = parse_attributes ( &hdr[k+1], table, 1024 );
		   break;
		}
		label[k] = _toupper(hdr[k]);
	    }
	    label[k] = '\0';
	    /*
	     * deal with specific header types.
	     */
	    if ( attr_count < 1 ) {
		/* Failed to parse anything after label. */
	    } else if ( tu_strncmp ( label, "CONTENT-DISPOSITION", 20 ) == 0 ) {
		/*
		 * Make disposition if FORM-DATA, lookup the name attribute.
		 */
		if ( tu_strncmp ( table[0].name, "FORM-DATA", 20 ) == 0 ) {
		    ndef = table_search ( &table[1], "NAME" );
		    fndef = table_search ( &table[1], "FILENAME" );
		}
	    } else if ( tu_strncmp ( label, "CONTENT-TYPE", 20 ) == 0 ) {
		ctdef = &table[0];
	    }
	}
	/*
	 * Add a form table symbol is parse of header found a valid
	 * content-disposition line (FORM-DATA; name="xxx").
	 */
	if ( ndef ) {
	    int first, last, k, length;
	    /*
	     * Upcase the name, compute the data size.
	     */
	    for ( k = 0; ndef->value[k]; k++ ) 
		ndef->value[k] = _toupper(ndef->value[k]);
	    first = set->line[j+1].offset;
	    last = set->line[i-1].offset + set->line[i-1].length;

	    define_symbol ( context, ndef->value, tu_strlen(ndef->value),
		set->line[j+1].text, last - first );
	    /*
	     * Make secondary symbols if requested by callback.  The callback
	     * supplied must make private copies of the key strings.
	     */
	    length = tu_strlen ( ndef->value );
	    if ( (def_status==3) && (length < 230) ) {
		char fnkey[256];
		tu_strcpy ( fnkey, ndef->value );
	        if ( fndef ) {
		    /* Finame as part of content-disposition header */
		    tu_strcpy ( &fnkey[length], ".FILENAME" );
		    define_symbol ( context, fnkey, tu_strlen(fnkey),
			fndef->value, tu_strlen(fndef->value)  );
		}
		if ( ctdef ) {
		    /* Content-type header */
		    tu_strcpy ( &fnkey[length], ".CONTENT-TYPE" );
		    define_symbol ( context, fnkey, tu_strlen(fnkey),
			ctdef->name, tu_strlen(ctdef->name)  );
		}
	    }
	}
	/*
	 * Terminate loop if current boundary line is EOD marker marker
	 * (boundary tag followed by "--").
	 */
	start = i + 1;
	if ( cur->length >= (bndlen + 2) ) {
	     if  ( cur->text[bndlen] == '-' && (cur->text[bndlen+1] == '-') ) {
		break;
	    }
	}
    }
    delete_lineset ( set );
    return;
}
/*****************************************************************************/
/* Callback function to add symbol to form structure.  Allocate a symbol
 * structure and insert in list.
 */
static int add_to_env ( void *form_vp, char *sym,
	int symlen, char *value, int vallen )
{
    int ndx, i;
    struct mst_form_values *form;
    struct mst_form_symbol *symbol;
    /*
     * Append. 'sym'='val' to form->buf and make pointer to it.
     */
    form = (struct mst_form_values *) form_vp;
    symbol = tm_malloc ( sizeof(struct mst_form_symbol) + symlen );
    if ( !symbol ) return 0;		/* allocation failure */

    tu_strnzcpy ( symbol->name, sym, symlen );
    symbol->value = value;
    symbol->vallen = vallen;

    symbol->next = form->symbol;
    form->symbol = symbol;

    return 1;
}

int mst_load_form_data ( mst_link_t cnx, 
	int content_limit,		/* max content_length allowed */
	struct mstshr_envbuf *env,
	struct mst_form_values **form_info )
{
    char  *method, *clen_str, *ctype_str, content_type[64];
    int content_length, status, written;
    struct mst_form_values *form;
    /*
     * Initialize form data.
     */
    tm_initialize();
    *form_info = (struct mst_form_values *) 0;
    /*
     * get content type and length and read content.
     */
    ctype_str = mstshr_getenv ( "CONTENT_TYPE", env );
    clen_str = mstshr_getenv ( "CONTENT_LENGTH", env );
    if ( clen_str ) content_length = atoi ( clen_str ); else content_length=0;

    if ( !ctype_str ) ctype_str = "";
    if ( tu_strlen ( ctype_str ) < sizeof(content_type) )
	tu_strupcase ( content_type, ctype_str );
    else tu_strnzcpy ( content_type, ctype_str, sizeof(content_type)-1 );

    if ( content_length > content_limit ) {
	/*
	 * Too much content in request to reasonably handle, abort.
	 */
	return 0;
    }

    form = (struct mst_form_values *) tm_malloc ( 
	sizeof(struct mst_form_values) + content_length + 256 );
    if ( !form ) return 0;		/* allocation failure */
    form->symbol = (struct mst_form_symbol *) 0;

    if ( content_length > 0 ) {
	int i, bytes;
	/*
	 * Read all content, note that we allocated an extra amount so
	 * we can always to full reads from the server.
	 */
	for ( i = 0; i < content_length; i += bytes ) {
	    status = mst_write ( cnx, "<DNETINPUT>", 11, &written );
	    if ( (status&1) == 0 ) return status;
	    status = mst_read ( cnx, &form->fdata[i], 256, &bytes );
	    if ( (status&1) == 0 ) return status;
	}
	form->content_length = i;
    }
    /*
     */
    if ( !env->prolog[0] ) return 0;
    if ( tu_strncmp ( env->prolog[0], "GET", 4 ) == 0 ) {
	/*
	 * Form data is in query string.
	 */
	char *qstring;
	qstring = mstshr_getenv ( "QUERY_STRING", env );
	if ( qstring ) {
	    content_length = tu_strlen ( qstring );
	    form = tm_realloc ( form, sizeof(struct mst_form_values) + 
		content_length );
	    if ( !form ) return 0;
	    tu_strcpy ( content_type, "APPLICATION/X-WWW-FORM-URLENCODED" );
	    memcpy ( form->fdata, qstring, content_length );
	    form->content_length = content_length;
	}
    }
    *form_info = form;
    /*
     * Convert the data.
     */
    status = mst_parse_form_content ( form, content_type, form->fdata,
		content_length, add_to_env );
    return 1;
}

void mst_free_form_data ( struct mst_form_values *form_info )
{
    struct mst_form_symbol *sym, *next_sym;
    /*
     * Free the symbol definition blocks.
     */
    if ( !form_info ) return;

    for ( sym = form_info->symbol; sym; sym = next_sym ) {
	next_sym = sym->next;
	tm_free ( sym );
    }
    form_info->symbol = (struct mst_form_symbol *) 0;
    /*
     * Free the content storage.
     */
    tm_free ( form_info );
}
/*
 * Return pointer to symbol definition that matches name or null if no match.
 */
struct mst_form_symbol *mst_get_form_symbol ( const char *name, 
	struct mst_form_values *form_info )
{
    struct mst_form_symbol *symbol;
    int len;

    len = tu_strlen ( name ) + 1;

    for ( symbol = form_info->symbol; symbol; symbol = symbol->next ) {
	if ( tu_strncmp ( name, symbol->name, len ) == 0 ) break;
    }
    return symbol;
}
