 /*6  * testHTML.c : a small tester program for HTML input.  *1  * See Copyright for the status of this software.   *  * Daniel.Veillard@w3.org   */    #ifdef WIN32 #include "win32config.h" #else  #include "config.h"  #endif   #include "xmlversion.h"  #ifdef LIBXML_HTML_ENABLED   #include <stdio.h> #include <string.h>  #include <stdarg.h>      #ifdef HAVE_SYS_TYPES_H  #include <sys/types.h> #endif #ifdef HAVE_SYS_STAT_H #include <sys/stat.h>  #endif #ifdef HAVE_FCNTL_H  #include <fcntl.h> #endif #ifdef HAVE_UNISTD_H #include <unistd.h>  #endif #ifdef HAVE_STDLIB_H #include <stdlib.h>  #endif   #include <libxml/xmlmemory.h>  #include <libxml/HTMLparser.h> #include <libxml/HTMLtree.h> #include <libxml/debugXML.h>   #ifdef LIBXML_DEBUG_ENABLED  static int debug = 0;  #endif static int copy = 0; static int sax = 0;  static int repeat = 0; static int noout = 0;  static int push = 0; static char *encoding = NULL;   ' xmlSAXHandler emptySAXHandlerStruct = {      NULL, /* internalSubset */     NULL, /* isStandalone */!     NULL, /* hasInternalSubset */ !     NULL, /* hasExternalSubset */      NULL, /* resolveEntity */      NULL, /* getEntity */      NULL, /* entityDecl */     NULL, /* notationDecl */     NULL, /* attributeDecl */      NULL, /* elementDecl */ "     NULL, /* unparsedEntityDecl */"     NULL, /* setDocumentLocator */     NULL, /* startDocument */      NULL, /* endDocument */      NULL, /* startElement */     NULL, /* endElement */     NULL, /* reference */      NULL, /* characters */#     NULL, /* ignorableWhitespace */ %     NULL, /* processingInstruction */      NULL, /* comment */       NULL, /* xmlParserWarning */     NULL, /* xmlParserError */     NULL, /* xmlParserError */"     NULL, /* getParameterEntity */ };  : xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;( extern xmlSAXHandlerPtr debugSAXHandler;  I /************************************************************************   *									*  *				Debug Handlers				*   *									*J  ************************************************************************/   /**   * isStandaloneDebug:    * @ctxt:  An XML parser context  *'  * Is this document tagged standalone ?   *  * Returns 1 if true  */  int  isStandaloneDebug(void *ctx) { ,     fprintf(stdout, "SAX.isStandalone()\n");     return(0); }    /**   * hasInternalSubsetDebug:   * @ctxt:  An XML parser context  *,  * Does this document has an internal subset  *  * Returns 1 if true  */  int ! hasInternalSubsetDebug(void *ctx)  { 1     fprintf(stdout, "SAX.hasInternalSubset()\n");      return(0); }    /**   * hasExternalSubsetDebug:   * @ctxt:  An XML parser context  *,  * Does this document has an external subset  *  * Returns 1 if true  */  int ! hasExternalSubsetDebug(void *ctx)  { 1     fprintf(stdout, "SAX.hasExternalSubset()\n");      return(0); }    /**   * hasInternalSubsetDebug:   * @ctxt:  An XML parser context  *,  * Does this document has an internal subset  */  void3 internalSubsetDebug(void *ctx, const xmlChar *name, ; 	       const xmlChar *ExternalID, const xmlChar *SystemID)  { 4     fprintf(stdout, "SAX.internalSubset(%s,", name);     if (ExternalID == NULL)  	fprintf(stdout, " ,");      else% 	fprintf(stdout, " %s,", ExternalID);      if (SystemID == NULL)  	fprintf(stdout, " )\n");      else% 	fprintf(stdout, " %s)\n", SystemID);  }    /**   * resolveEntityDebug:   * @ctxt:  An XML parser context)  * @publicId: The public ID of the entity )  * @systemId: The system ID of the entity   *=  * Special entity resolver, better left to the parser, it has +  * more context than the application layer. E  * The default behaviour is to NOT resolve the entities, in that case E  * the ENTITY_REF nodes are built in the structure (and the parameter   * values).   *F  * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.  */  xmlParserInputPtr O resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)  { 9     /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */        *     fprintf(stdout, "SAX.resolveEntity(");     if (publicId != NULL) ) 	fprintf(stdout, "%s", (char *)publicId);      else 	fprintf(stdout, " ");     if (systemId != NULL) . 	fprintf(stdout, ", %s)\n", (char *)systemId);     else 	fprintf(stdout, ", )\n");
 /*********     if (systemId != NULL) { =         return(xmlNewInputFromFile(ctxt, (char *) systemId));      }   *********/      return(NULL);  }    /**   * getEntityDebug:   * @ctxt:  An XML parser context  * @name: The entity name   *  * Get an entity by name  *F  * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.  */  xmlEntityPtr. getEntityDebug(void *ctx, const xmlChar *name) { 1     fprintf(stdout, "SAX.getEntity(%s)\n", name);      return(NULL);  }    /**   * getParameterEntityDebug:    * @ctxt:  An XML parser context  * @name: The entity name   *!  * Get a parameter entity by name   *   * Returns the xmlParserInputPtr  */  xmlEntityPtr7 getParameterEntityDebug(void *ctx, const xmlChar *name)  { :     fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);     return(NULL);  }      /**   * entityDeclDebug:    * @ctxt:  An XML parser context  * @name:  the entity name    * @type:  the entity type  )  * @publicId: The public ID of the entity )  * @systemId: The system ID of the entity 3  * @content: the entity value (without processing).   *'  * An entity definition has been parsed   */  void9 entityDeclDebug(void *ctx, const xmlChar *name, int type, M           const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)  { ;     fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n", 5             name, type, publicId, systemId, content);  }    /**   * attributeDeclDebug:   * @ctxt:  An XML parser context  * @name:  the attribute name   * @type:  the attribute type   **  * An attribute definition has been parsed  */  voidG attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name, =               int type, int def, const xmlChar *defaultValue,  	      xmlEnumerationPtr tree) { C     fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n", 1             elem, name, type, def, defaultValue);  }    /**   * elementDeclDebug:   * @ctxt:  An XML parser context  * @name:  the element name   * @type:  the element type 4  * @content: the element value (without processing).  *(  * An element definition has been parsed  */  void: elementDeclDebug(void *ctx, const xmlChar *name, int type," 	    xmlElementContentPtr content) { 5     fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",              name, type); }    /**   * notationDeclDebug:    * @ctxt:  An XML parser context"  * @name: The name of the notation)  * @publicId: The public ID of the entity )  * @systemId: The system ID of the entity   *:  * What to do when a notation declaration has been parsed.  */  void1 notationDeclDebug(void *ctx, const xmlChar *name, 7 	     const xmlChar *publicId, const xmlChar *systemId)  { 5     fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n", A             (char *) name, (char *) publicId, (char *) systemId);  }    /**   * unparsedEntityDeclDebug:    * @ctxt:  An XML parser context   * @name: The name of the entity)  * @publicId: The public ID of the entity )  * @systemId: The system ID of the entity *  * @notationName: the name of the notation  *;  * What to do when an unparsed entity declaration is parsed   */  void7 unparsedEntityDeclDebug(void *ctx, const xmlChar *name, 6 		   const xmlChar *publicId, const xmlChar *systemId,! 		   const xmlChar *notationName)  { ?     fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n", @             (char *) name, (char *) publicId, (char *) systemId, 	    (char *) notationName); }    /**   * setDocumentLocatorDebug:    * @ctxt:  An XML parser context  * @loc: A SAX Locator  *I  * Receive the document locator at startup, actually xmlDefaultSAXLocator J  * Everything is available on the context, so this is useless in our case.  */  void8 setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc) { 2     fprintf(stdout, "SAX.setDocumentLocator()\n"); }    /**   * startDocumentDebug:   * @ctxt:  An XML parser context  *2  * called when the document start being processed.  */  void startDocumentDebug(void *ctx)  { -     fprintf(stdout, "SAX.startDocument()\n");  }    /**   * endDocumentDebug:   * @ctxt:  An XML parser context  *2  * called when the document end has been detected.  */  void endDocumentDebug(void *ctx)  { +     fprintf(stdout, "SAX.endDocument()\n");  }    /**   * startElementDebug:    * @ctxt:  An XML parser context  * @name:  The element name   *1  * called when an opening tag has been processed.   */  voidG startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)  { 
     int i;  :     fprintf(stdout, "SAX.startElement(%s", (char *) name);     if (atts != NULL) { +         for (i = 0;(atts[i] != NULL);i++) { ( 	    fprintf(stdout, ", %s", atts[i++]); 	    if (atts[i] != NULL) {  		unsigned char output[40]; % 		const unsigned char *att = atts[i];  		int outlen, attlen;  	        fprintf(stdout, "='"); - 		while ((attlen = strlen((char*)att)) > 0) { ! 		    outlen = sizeof output - 1; > 		    htmlEncodeEntities(output, &outlen, att, &attlen, '\'');. 		    fprintf(stdout, "%.*s", outlen, output); 		    att += attlen; 		}  		fprintf(stdout, "'");  	    } 	}     }      fprintf(stdout, ")\n");  }    /**   * endElementDebug:    * @ctxt:  An XML parser context  * @name:  The element name   *7  * called when the end of an element has been detected.   */  void/ endElementDebug(void *ctx, const xmlChar *name)  { ;     fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);  }    /**   * charactersDebug:    * @ctxt:  An XML parser context  * @ch:  a xmlChar string   * @len: the number of xmlChar  *(  * receiving some chars from the parser.#  * Question: how much at a time ???   */  void6 charactersDebug(void *ctx, const xmlChar *ch, int len) {      unsigned char output[40]; !     int inlen = len, outlen = 30;   7     htmlEncodeEntities(output, &outlen, ch, &inlen, 0);      output[outlen] = 0;   =     fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);  }    /**   * referenceDebug:   * @ctxt:  An XML parser context  * @name:  The entity name  *0  * called when an entity reference is detected.   */  void. referenceDebug(void *ctx, const xmlChar *name) { 1     fprintf(stdout, "SAX.reference(%s)\n", name);  }    /**   * ignorableWhitespaceDebug:   * @ctxt:  An XML parser context  * @ch:  a xmlChar string '  * @start: the first char in the string   * @len: the number of xmlChar  *8  * receiving some ignorable whitespaces from the parser.#  * Question: how much at a time ???   */  void? ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)  {      char output[40];
     int i;  '     for (i = 0;(i<len) && (i < 30);i++)  	output[i] = ch[i];      output[i] = 0;  F     fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len); }    /**   * processingInstructionDebug:   * @ctxt:  An XML parser context  * @target:  the target name  * @data: the PI data's   * @len: the number of xmlChar  *,  * A processing instruction has been parsed.  */  void< processingInstructionDebug(void *ctx, const xmlChar *target,*                       const xmlChar *data) { :     fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",,             (char *) target, (char *) data); }    /**   * commentDebug:   * @ctxt:  An XML parser context  * @value:  the comment content   *  * A comment has been parsed.   */  void- commentDebug(void *ctx, const xmlChar *value)  { 0     fprintf(stdout, "SAX.comment(%s)\n", value); }    /**   * warningDebug:   * @ctxt:  An XML parser context)  * @msg:  the message to display/transmit 2  * @...:  extra parameters for the message display  *H  * Display and format a warning messages, gives file, line, position and  * extra parameters.  */  void- warningDebug(void *ctx, const char *msg, ...)  {      va_list args;        va_start(args, msg);%     fprintf(stdout, "SAX.warning: ");       vfprintf(stdout, msg, args);     va_end(args);  }    /**   * errorDebug:   * @ctxt:  An XML parser context)  * @msg:  the message to display/transmit 2  * @...:  extra parameters for the message display  *F  * Display and format a error messages, gives file, line, position and  * extra parameters.  */  void+ errorDebug(void *ctx, const char *msg, ...)  {      va_list args;        va_start(args, msg);#     fprintf(stdout, "SAX.error: ");       vfprintf(stdout, msg, args);     va_end(args);  }    /**   * fatalErrorDebug:    * @ctxt:  An XML parser context)  * @msg:  the message to display/transmit 2  * @...:  extra parameters for the message display  *K  * Display and format a fatalError messages, gives file, line, position and   * extra parameters.  */  void0 fatalErrorDebug(void *ctx, const char *msg, ...) {      va_list args;        va_start(args, msg);(     fprintf(stdout, "SAX.fatalError: ");      vfprintf(stdout, msg, args);     va_end(args);  }   ' xmlSAXHandler debugSAXHandlerStruct = {      internalSubsetDebug,     isStandaloneDebug,     hasInternalSubsetDebug,      hasExternalSubsetDebug,      resolveEntityDebug,      getEntityDebug,      entityDeclDebug,     notationDeclDebug,     attributeDeclDebug,      elementDeclDebug,      unparsedEntityDeclDebug,     setDocumentLocatorDebug,     startDocumentDebug,      endDocumentDebug,      startElementDebug,     endElementDebug,     referenceDebug,      charactersDebug,     ignorableWhitespaceDebug,      processingInstructionDebug,      commentDebug,      warningDebug,      errorDebug,      fatalErrorDebug,     getParameterEntityDebug, };  : xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;I /************************************************************************   *									*  *				Debug					*   *									*J  ************************************************************************/  # void parseSAXFile(char *filename) {      htmlDocPtr doc = NULL;       /*#      * Empty callbacks for checking       */      if (push) { 	 	FILE *f;    	f = fopen(filename, "r"); 	if (f != NULL) {  	    int res, size = 3;  	    char chars[4096]; 	    htmlParserCtxtPtr ctxt;   	    /* if (repeat) */ 		size = 4096;! 	    res = fread(chars, 1, 4, f);  	    if (res > 0) { 8 		ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,  			    chars, res, filename, 0);0 		while ((res = fread(chars, 1, size, f)) > 0) {* 		    htmlParseChunk(ctxt, chars, res, 0); 		} $ 		htmlParseChunk(ctxt, chars, 0, 1); 		doc = ctxt->myDoc; 		htmlFreeParserCtxt(ctxt);  	    } 	    if (doc != NULL) { : 		fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); 		xmlFreeDoc(doc); 	    } 	    fclose(f);  	} 	if (!noout) { 	    f = fopen(filename, "r"); 	    if (f != NULL) {  		int res, size = 3; 		char chars[4096];  		htmlParserCtxtPtr ctxt;    		/* if (repeat) */  		    size = 4096; 		res = fread(chars, 1, 4, f); 		if (res > 0) {< 		    ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL, 				chars, res, filename, 0); 4 		    while ((res = fread(chars, 1, size, f)) > 0) {' 			htmlParseChunk(ctxt, chars, res, 0);  		    } ( 		    htmlParseChunk(ctxt, chars, 0, 1); 		    doc = ctxt->myDoc; 		    htmlFreeParserCtxt(ctxt);  		}  		if (doc != NULL) {> 		    fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); 		    xmlFreeDoc(doc); 		}  		fclose(f); 	    } 	}
     } else {	 ? 	doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);  	if (doc != NULL) { = 	    fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");  	    xmlFreeDoc(doc);  	}   	if (!noout) { 	    /*  	     * Debug callback 	     */C 	    doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);  	    if (doc != NULL) { : 		fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); 		xmlFreeDoc(doc); 	    } 	}     }  }   ( void parseAndPrintFile(char *filename) {     htmlDocPtr doc = NULL, tmp;        /*(      * build an HTML tree from a string;      */      if (push) { 	 	FILE *f;    	f = fopen(filename, "r"); 	if (f != NULL) {  	    int res, size = 3;  	    char chars[4096]; 	    htmlParserCtxtPtr ctxt;   	    /* if (repeat) */ 		size = 4096;! 	    res = fread(chars, 1, 4, f);  	    if (res > 0) { - 		ctxt = htmlCreatePushParserCtxt(NULL, NULL,   			    chars, res, filename, 0);0 		while ((res = fread(chars, 1, size, f)) > 0) {* 		    htmlParseChunk(ctxt, chars, res, 0); 		} $ 		htmlParseChunk(ctxt, chars, 0, 1); 		doc = ctxt->myDoc; 		htmlFreeParserCtxt(ctxt);  	    } 	    fclose(f);  	}
     } else {	 % 	doc = htmlParseFile(filename, NULL);      }      if (doc == NULL) {:         fprintf(stderr, "Could not parse %s\n", filename);     }        /*(      * test intermediate copy if needed.      */      if (copy) {          tmp = doc; 	doc = xmlCopyDoc(doc, 1); 	xmlFreeDoc(tmp);      }        /*      * print it.      */      if (!noout) {  #ifdef LIBXML_DEBUG_ENABLED  	if (!debug) { 	    if (encoding)& 		htmlSaveFileEnc("-", doc, encoding);	 	    else  		htmlDocDump(stdout, doc);  	} else ' 	    xmlDebugDumpDocument(stdout, doc);  #else  	if (encoding)) 	    htmlSaveFileEnc("-", doc, encoding);  	else  	    htmlDocDump(stdout, doc); #endif     }	       /*      * free it.       */      xmlFreeDoc(doc); }   ! int main(int argc, char **argv) {      int i, count;      int files = 0;  !     for (i = 1; i < argc ; i++) {  #ifdef LIBXML_DEBUG_ENABLED C 	if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) 
 	    debug++;  	else  #endifE 	    if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))  	    copy++;F 	else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push"))) 	    push++;D 	else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax"))) 	    sax++; H 	else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
 	    noout++; * 	else if ((!strcmp(argv[i], "-repeat")) ||) 	         (!strcmp(argv[i], "--repeat")))  	    repeat++;* 	else if ((!strcmp(argv[i], "-encode")) ||+ 	         (!strcmp(argv[i], "--encode"))) { 	 	    i++;  	    encoding = argv[i];	         }      } !     for (i = 1; i < argc ; i++) { % 	if ((!strcmp(argv[i], "-encode")) || + 	         (!strcmp(argv[i], "--encode"))) { 	 	    i++;  	    continue;	         }  	if (argv[i][0] != '-') {  	    if (repeat) {0 		for (count = 0;count < 100 * repeat;count++) { 		    if (sax) 			parseSAXFile(argv[i]); 
 		    else     			parseAndPrintFile(argv[i]); 		}     
 	    } else { 
 		if (sax) 		    parseSAXFile(argv[i]);	 		else    ! 		    parseAndPrintFile(argv[i]);  	    } 	    files ++; 	}     }      if (files == 0) { A 	printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",  	       argv[0]); I 	printf("\tParse the HTML files and output the result of the parsing\n");  #ifdef LIBXML_DEBUG_ENABLED E 	printf("\t--debug : dump a debug tree of the in-memory document\n");  #endifF 	printf("\t--copy : used to test the internal copy implementation\n");; 	printf("\t--sax : debug the sequence of SAX callbacks\n"); ? 	printf("\t--repeat : parse the file 100 times, for timing\n"); 1 	printf("\t--noout : do not print the result\n"); 1 	printf("\t--push : use the push mode parser\n"); @ 	printf("\t--encode encoding : output in the given encoding\n");     }      xmlCleanupParser();      xmlMemoryDump();       return(0); }   #else /* !LIBXML_HTML_ENABLED */ #include <stdio.h>! int main(int argc, char **argv) { ;     printf("%s : HTML support not compiled in\n", argv[0]);      return(0); }  #endif