 /*E ** Copyright  1993, 1994 by Eric M. LaFranchi.  All Rights Reserved.  **J ** This software is Copyright 1993, 1994 by Eric M. LaFranchi.  PermissionJ ** to use, copy, and freely redistributed this software in its entirety isC ** hereby granted provided that the above copyright notice and this L ** permission notice are retained.  This software may not be sold for profitF ** or incorporated in commercial software products without the writtenK ** permission of the author.  This software is provided "as is", the author L ** nor his employer make any representation of warranty, express or implied,J ** with respect to any code or other information herein.  In addition, theI ** author disclaim's any liability whatsoever for any use of such code or  ** other information.  **   **+-+  ** ** ANSI89 Compliant  **
 ** Module: **	PACK_SUBJECT.C  ** ** Abstract:F **	This module provides the parse_subject function and the static dataC **	data tables that support it.  The routine parse_subject function  **	works as follows: **? **	Given a subject line from a NEWS or MAIL article, we look to ? **	see if it's something we might be interested in.  If not, we 
 **	return. **B **	Articles containing a subject string of the following forms areF **	examined carefully.  Whitespace is ignore and the subjects may have- **	comments before or after the package name.  **# **	N-OF-N		(VMS_SHARE type posting)  **	N of N		(N is any number): **	N fo N          (bad typist not using posting software)
 **      N / N ! **      N \ N		(dyslexic poster?)  **	N | N		(dyslexic poster?) **; **	In addition, ftpmail packages with subjects of the form: 3 **		part nnn of package (@source) [subject] (flags)  **A **	This module contains three static tables use to help parse the C **	subject line of an article. In addition to simplifying the code, B **	these table can be modified to enhance the ability of this code **	to extract packages.  **? **	* The part_dividers table is used to locate the place in the D **	  subject string where the part number identifying the part is to **	  be found. **A **	* The delimiter_records structure contains a table of matching 8 **	  delimiters that may appear around the part numbers. **@ **	* The keyword_eliminators table is a table of words that when5 **	  encountered tells us to ignore the subject line.  **
 ** Author: **	Eric M. LaFranchi ** ** Creation Date:  **	20-APR-1993 ** ** Modification History:( **	EML008		Eric M. LaFranchi	12-JAN-1994( **	Added support for separator argument. **( **	EML007		Eric M. LaFranchi	12-JAN-1994A **	Added code to make a better guess at the subject name, and try ) **	to avoid names like V1.3, for example.  **( **	EML006		Eric M. LaFranchi	31-DEC-1993> **	Added code to determine numbers in a part (e.g. 1 / 1) from? **	numbers in a package (e.g. 1 / 1foo.bar) where 1foo.bar is a  **	package name. **( **	EML005		Eric M. LaFranchi	01-DEC-1993? **	Fixed bug that broke the /OVERIDE qualifier in last version.  **( **	EML004		Eric M. LaFranchi	02-NOV-1993= **	Fix subject parsing to search for addition divider objects 2 **	in subject if the first one is not a valid one. **( **	EML003		Eric M. LaFranchi	21-SEP-1993/ **	Added support for ftpmail (DECWRL) subjects.  **( **	EML002		Eric M. LaFranchi	20-SEP-1993F **	Fixed code, so it can find parts at the beginning of subject lines. **( **	EML001		Eric M. LaFranchi	30-JUN-1993A **	Fixed bug which appended a space onto the front of the package  **	name. **E **	Added code to look for packages that didn't include a part number.  ** **-+-  **/    #include <stdio.h> #include <string.h>  #include <stdlib.h>  #include <ctype.h> #include "msgdef.h"  #include "packdef.h"  D #define isfilename( s )		(isalpha(s) || isdigit(s) || (s == '-') ||\, 				 (s == '_') || (s == '$') || (s == '.'))  D #define iswhitespace( s )	((s == ' ') || (s == '\t') || (s == '\v'))     #ifdef VAXC  #pragma nostandard& globalref const unsigned int gblflags; #pragma standard #else # extern const unsigned int gblflags;  #endif  A     /* Table of package part dividers. These are single character "      * dividers and word dividers.      */ 1 static char *part_dividers[ ] = { "/", "\\", "|",  				  "-OF-","-of-", 				  "_OF_","_of_", 				  "OF", "FO",  				  "of", "fo",  				  NULL, NULL };   E #define SEPARATOR_LENGTH (sizeof( part_dividers ) / sizeof( char * ))   A     /* Table of package part delimiters. These delimiters are the 0      * beginning and matching ending delimiters.      */ % static const struct delimiter_records  {     const size_t length;     const char *left;    const char *right; > } delimiters[ ] = { { sizeof( "(part " ) - 1, "(part ", ")" },0 		    { sizeof( "[part " ) - 1, "[part ", "]" },0 		    { sizeof( "{part " ) - 1, "{part ", "}" }, 		    { 1, "(", ")" }, 		    { 1, "[", "]" }, 		    { 1, "{", "}" }, 		    { 0, NULL, NULL }, };   =     /* Keywords that eliminate the subject line from parsing.       */ H static const char *const keyword_eliminators[ ] = { "RE:", "Re:", "re:",! 						    "WAS:", "Was:", "was:", ' 						    "REPOST", "Repost", "repost",  						    NULL, NULL };   C     /* Package extensions to search for if no part number is found.       */ B static const char *const keyword_extensions[ ] = { ".gif", ".GIF", 						   ".jpg", ".JPG", 						   ".jpeg", ".JPEG",        						   ".com", ".COM",  						   ".exe", ".EXE", 	       					   ".mpg", ".MPG",  						   ".mpeg", ".MPEG", 						   ".wav", ".WAV", 						   ".lzh", ".LZH", 						   ".tiff", ".TIFF", 						   ".uue", ".UUE", 						   ".uu", ".UU", 						   ".zip", ".ZIP", 						   ".zoo", ".ZOO", 						   NULL, NULL };   /*  *++  * Function:  *	parse_subject  *  * Abstract:9  *	parse_subject( ) breaks a subject line down into three >  *	components: The package name, the part number and the total?  *	number of parts in the package; the remainder is disguarded.   *
  * Inputs:-  *	subject -- pointer to article subject name 1  *	separator -- pointer to user suplied seperator H  *	package -- pointer to a buffer to which the package name gets writtenE  *	part    -- pointer to unsigned longword to receive the part number K  *	expected -- pointer to unsigned longword to receive expected part number E  *	options	-- pointer to unsigned longword to receive subject options   *  * Outputs:   *	part number written  *	total expected parts written   *	package name written   *  * Return Value:  *	Status value   *  * Special Notes: H  *	The package is assumed to be at least as large as the subject buffer.  */  size_t) parse_subject( const char *const subject, $ 	       const char *const separator, 	       char *const package,! 	       unsigned int *const part, % 	       unsigned int *const expected, % 	       unsigned int *const options )  { %     register unsigned int state, tmp;      register char *p, *pt;(     register const char *const *keyword;-     register const char *s, *right_delimiter; 8     register const struct delimiter_records *delimiter;    	/* If null string then return 	 */2     if ( (subject == NULL) || (*subject == '\0') ) 	return ( PACKASM_NOPACKAGE );  < 	/* If the subject is a reply or has another keyword that we 	 * want to ignore, then return  	 */+     if ( !(gblflags & PACKASM_M_OVERRIDE) )      { ; 	for ( keyword = keyword_eliminators; *keyword; keyword++ ) / 	    if ( strstr( subject, *keyword ) != NULL )  		return ( PACKASM_NOPACKAGE );      }   @ 	/* If an additional seperator string was specified by the user," 	 * then add it to the seach list. 	 */6     if ( (separator != NULL) && (*separator != '\0') )9 	part_dividers[SEPARATOR_LENGTH - 2] = (char *)separator;   2 	/* assume normal subject, and set options to zero 	 */     *options = 0;   9 	/* search the subject string for a possible package name  	 */M     for ( keyword = (const char *const *)part_dividers; *keyword; keyword++ )      { 
 	s = subject;   2 	while ( (s = strstr( s + 1, *keyword )) != NULL ) 	{ 	    tmp = strlen( *keyword );, 	    if ( strncmp( s, *keyword, tmp ) == 0 ) 	    {9 		    /* get total parts in the package. Continue looking " 		     * if total parts not found.	 		     */  		pt = (char *)s + tmp;  		while ( iswhitespace( *pt ) )  		    pt++;  		if ( isdigit( *pt ) )  		    *expected = atoi( pt );  		else 		    continue;    		while ( isdigit( *pt ) ) 		    pt++;   9 		    /* if '/' character, then assume this is a date and " 		     * continue looking for next	 		     */ $ 		if ( (*s == '/') && (*pt == '/') ) 		{ 
 		    s = pt;  		    continue;  		}   0 		    /* Make sure expected part is real and not  		     * part of a package name.	 		     */  		if ( isfilename( *pt ) ) 		    continue;    		while ( iswhitespace( *pt ) )  		    pt++;    		right_delimiter = pt;   ; 		    /* Get the part number if it exists. Continue looking " 		     * if part number not found.	 		     */  		pt = (char *)s; % 		while ( iswhitespace( *(pt - 1) ) )  		    --pt;   		while ( isdigit( *(pt - 1) ) ) 		    --pt;    		if ( pt != s ) 		    *part = atoi( pt );  		else 		    continue;   ; 		    /* if we're not a the beginning of the subject buffer ; 		     * then check for a delimiter around the part numbers 	 		     */  		if ( pt > subject )  		{ D 		    for ( delimiter = delimiters; delimiter->length; delimiter++ ) 		    {  			p = pt - delimiter->length;? 			if ( strncmp( p, delimiter->left, delimiter->length ) == 0 )  			{5 			    if ( *right_delimiter == *(delimiter->right) )  			    { 				right_delimiter++; 				pt = p; 
 				break; 			    } 			    else  				return ( PACKASM_PARMDEL );  			} 		    }  		}   4 		    /* Copy package name superstring into package.3 		     * Remove all special characters that are not  		     * valid VMS filenames. 	 		     */ 7 		while ( (pt > subject) && iswhitespace( *(pt - 1) ) )  		    --pt;   9 		    /* If part number was at the beginning of the line, 7 		     * search for the subject at the end of the line. 4 		     * Otherwise start looking from the beginning.	 		     */  		if ( pt != subject ) 		    s = subject; 		else 		{  		    s = right_delimiter;% 		    pt = (char *)(s + strlen( s ));  		}    		p = package;+ 		for ( tmp = pt - s; tmp > 0; tmp--, s++ ) 3 		    if ( isfilename( *s ) || iswhitespace( *s ) ) 
 			*p++ = *s;  		*p = '\0';  / 		    /* check for file extension delimiter and & 		     * build file name if it exists.	 		     */ 1 		if ( (s = strchr( p = package, '.' )) != NULL )  		{ 3 			/* scan back from the '.' or end of string until 6 			 * a whitespace character is found or the beginning 			 * of the string is found.  			 */) 		    for ( pt = (char *)s; s != p; --s ) " 			if ( iswhitespace( *(s - 1) ) )
 			    break;   5 			/* scan forward until white space of end of string  			 */ 		    for ( ; *pt; ++pt )  			if ( iswhitespace( *pt ) ) 
 			    break;  		    *pt = '\0';   1 			/* Check for bogus subject, if so find another  			 */ 		    if ( s != p )  		    { ( 			    /* Is subject a version number???
 			     */$ 			if ( (*s == 'V') || (*s == 'v') ) 			{ 			    pt = (char *)s + 1;/ 			    while ( isdigit( *pt ) || (*pt == '.') ) 	 				pt++;    			    if ( (pt - s) >= 4 ) 
 				s = NULL;  			}# 			    /* Does subject contain ".." 
 			     *// 			else if ( (*s == '.') && (*(s + 1) == '.') )  			    s = NULL; 		    }  		}    		if ( s == NULL ) 		{ " 		    while ( iswhitespace( *p ) ) 			p++;  		    s = p;+ 		    if ( (p = strchr( p, ' ' )) != NULL ) 
 			*p = '\0';  		}    		while ( iswhitespace( *s ) )
 		    s++;  / 		    /* Form a package name with only one file  		     * type ('.' extension) 	 		     */ " 		for ( p = strchr( s, '.' ); p; ) 		{ . 		    if ( (pt = strchr( ++p, '.' )) != NULL ) 		    s = p;
 		    p = pt;  		};  %                 strcpy( package, s );    		return ( PACKASM_SUCCESS );  	    } 	}     }    	/* look for ftpmail subject 	 */     *part = 0;     package[0] = '\0';6     sscanf( subject, "part %d of %s", part, package );  /     if ( (*part != 0) && (package[0] != '\0') )      { = 	    /* assume all packages have the maximum number of parts.  	     */ 	*expected = 999;   . 	if ( (strstr( subject, ", last" ) != NULL) ||, 	     (strstr( subject, "(last" ) != NULL) )# 	    *options = PACKASM_M_LASTPART;    	return ( PACKASM_SUCCESS );     }   = 	/* look for a package with no package part number but may be 3 	 * a candidate because of package extensions type.  	 */=     for ( keyword = keyword_extensions; *keyword; keyword++ )      { 1 	if ( (p = strstr( subject, *keyword )) != NULL )  	{$ 	    for ( s = p; s > subject; s-- )! 		if ( iswhitespace( *(s - 1) ) ) 	 			break; ( 	    tmp = (p + strlen( *keyword )) - s;  	    strncpy( package, s, tmp ); 	    package[tmp] = '\0';  	    *expected = *part = 1;     	    return ( PACKASM_SUCCESS ); 	}     }   / 	/* if no package name found in subject, return  	 */!     return ( PACKASM_NOPACKAGE );  } 