 /*K  * encoding.c : implements the encoding conversion functions needed for XML   *  * Related specs: A  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies J  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau-  * [ISO-10646]    UTF-8 and UTF-16 in Annexes /  * [ISO-8859-1]   ISO Latin-1 characters codes. B  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --H  *                Worldwide Character Encoding -- Version 1.0", Addison-C  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is ;  *                described in Unicode Technical Report #4. G  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for :  *                Information Interchange, ANSI X3.4-1986.  *O  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>   *1  * See Copyright for the status of this software.   *  * Daniel.Veillard@w3.org   */    #ifdef WIN32 #include "win32config.h" #else  #include "config.h"  #endif   #include <stdio.h> #include <string.h>    #ifdef HAVE_CTYPE_H  #include <ctype.h> #endif #ifdef HAVE_STDLIB_H #include <stdlib.h>  #endif #include <libxml/xmlversion.h> #ifdef LIBXML_ICONV_ENABLED  #ifdef HAVE_ERRNO_H  #include <errno.h> #endif #endif #include <libxml/encoding.h> #include <libxml/xmlmemory.h>  #ifdef LIBXML_HTML_ENABLED #include <libxml/HTMLparser.h> #endif  3 xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 3 xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;   : typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;6 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; struct _xmlCharEncodingAlias {     const char *name;      const char *alias; };  = static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; ( static int xmlCharEncodingAliasesNb = 0;) static int xmlCharEncodingAliasesMax = 0;    #ifdef LIBXML_ICONV_ENABLED  #if 0 @ #define DEBUG_ENCODING  /* Define this to get encoding traces */ #endif #endif   static int xmlLittleEndian = 1;    /*9  * From rfc2044: encoding of the Unicode values on UTF-8:   *=  * UCS-4 range (hex.)           UTF-8 octet sequence (binary) !  * 0000 0000-0000 007F   0xxxxxxx *  * 0000 0080-0000 07FF   110xxxxx 10xxxxxx4  * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx   *5  * I hope we won't use values > 0xFFFF anytime soon !   */    /**   * xmlGetUTF8Char:+  * @utf:  a sequence of UTF-8 encoded bytes !  * @len:  a pointer to @bytes len   *  * Read one UTF8 Char from @utf   *I  * Returns the char value or -1 in case of error and update @len with the   *        number of bytes used  */  int 4 xmlGetUTF8Char(const unsigned char *utf, int *len) {     unsigned int c;        if (utf == NULL) 	goto error;     if (len == NULL) 	goto error;     if (*len < 1)  	goto error;       c = utf[0];      if (c & 0x80) {  	if (*len < 2) 	    goto error; 	if ((utf[1] & 0xc0) != 0x80)  	    goto error; 	if ((c & 0xe0) == 0xe0) { 	    if (*len < 3)
 		goto error; ! 	    if ((utf[2] & 0xc0) != 0x80) 
 		goto error;  	    if ((c & 0xf0) == 0xf0) { 		if (*len < 4)  		    goto error; 4 		if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80) 		    goto error;  		*len = 4;  		/* 4-byte code */  		c = (utf[0] & 0x7) << 18;  		c |= (utf[1] & 0x3f) << 12;  		c |= (utf[2] & 0x3f) << 6; 		c |= utf[3] & 0x3f; 
 	    } else {  	      /* 3-byte code */ 		*len = 3;  		c = (utf[0] & 0xf) << 12;  		c |= (utf[1] & 0x3f) << 6; 		c |= utf[2] & 0x3f;  	    }	 	} else {  	  /* 2-byte code */ 	    *len = 2; 	    c = (utf[0] & 0x1f) << 6; 	    c |= utf[1] & 0x3f; 	}     } else { 	/* 1-byte code */
 	*len = 1;     }      return(c);   error:
     *len = 0;      return(-1);  }    /** 1  * xmlCheckUTF8: Check utf-8 string for legality. 2  * @utf: Pointer to putative utf-8 encoded string.  *;  * Checks @utf for being valid utf-8. @utf is assumed to be A  * null-terminated. This function is not super-strict, as it will A  * allow longer utf-8 sequences than necessary. Note that Java is D  * capable of producing these sequences if provoked. Also note, thisF  * routine checks for the 4-byte maxiumum size, but does not check for  * 0x10ffff maximum value.  *'  * Return value: true if @utf is valid.   **/ int & xmlCheckUTF8(const unsigned char *utf) {      int ix;      unsigned char c;  "     for (ix = 0; (c = utf[ix]);) {         if (c & 0x80) { & 	    if ((utf[ix + 1] & 0xc0) != 0x80) 	        return(0);  	    if ((c & 0xe0) == 0xe0) {* 	        if ((utf[ix + 2] & 0xc0) != 0x80) 		    return(0);" 	        if ((c & 0xf0) == 0xf0) {= 		    if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)  		        return(0); 		    ix += 4; 		    /* 4-byte code */  	        } else  		  /* 3-byte code */  		    ix += 3; 	    } else  	      /* 2-byte code */ 	        ix += 2;  	} else  	    /* 1-byte code */
 	    ix++;       }        return(1); }    /**   * asciiToUTF8: <  * @out:  a pointer to an array of bytes to store the result  * @outlen:  the length of @out -  * @in:  a pointer to an array of ASCII chars   * @inlen:  the length of @in   *C  * Take a block of ASCII chars in and try to convert it to an UTF-8   * block of chars out.(  * Returns 0 if success, or -1 otherwiseD  * The value of @inlen after return is the number of octets consumed<  *     as the return value is positive, else unpredictiable.F  * The value of @outlen after return is the number of ocetes consumed.  */  int , asciiToUTF8(unsigned char* out, int *outlen,4               const unsigned char* in, int *inlen) {"     unsigned char* outstart = out;#     const unsigned char* base = in; (     const unsigned char* processed = in;*     unsigned char* outend = out + *outlen;     const unsigned char* inend;      unsigned int c; 
     int bits;        inend = in + (*inlen);<     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 	c= *in++;  + 	/* assertion: c is a single UTF-4 value */          if (out >= outend) 	    break; G         if      (c <    0x80) {  *out++=  c;                bits= -6; }          else {   	    *outlen = out - outstart; 	    *inlen = processed - base;  	    return(-1); 	}   &         for ( ; bits >= 0; bits-= 6) {             if (out >= outend) 	        break; 0             *out++= ((c >> bits) & 0x3F) | 0x80;	         } ' 	processed = (const unsigned char*) in;      }      *outlen = out - outstart;      *inlen = processed - base;     return(0); }    /**   * UTF8Toascii: <  * @out:  a pointer to an array of bytes to store the result  * @outlen:  the length of @out -  * @in:  a pointer to an array of UTF-8 chars   * @inlen:  the length of @in   *C  * Take a block of UTF-8 chars in and try to convert it to an ASCII   * block of chars out.  *E  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise D  * The value of @inlen after return is the number of octets consumed<  *     as the return value is positive, else unpredictiable.F  * The value of @outlen after return is the number of ocetes consumed.  */  int , UTF8Toascii(unsigned char* out, int *outlen,4               const unsigned char* in, int *inlen) {(     const unsigned char* processed = in;      const unsigned char* outend;(     const unsigned char* outstart = out;&     const unsigned char* instart = in;     const unsigned char* inend;      unsigned int c, d;     int trailing;        if (in == NULL) { 
         /*  	 * initialization nothing to do 	 */
 	*outlen = 0;  	*inlen = 0; 	return(0);      }      inend = in + (*inlen);     outend = out + (*outlen);      while (in < inend) { 	d = *in++; + 	if      (d < 0x80)  { c= d; trailing= 0; }  	else if (d < 0xC0) { , 	    /* trailing byte in leading position */ 	    *outlen = out - outstart;" 	    *inlen = processed - instart; 	    return(-2);;         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; } 9         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; } 9         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }  	else { & 	    /* no chance for this in Ascii */ 	    *outlen = out - outstart;" 	    *inlen = processed - instart; 	    return(-2); 	}   	if (inend - in < trailing) {  	    break;  	}      	for ( ; trailing; trailing--) {8 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 		break;
 	    c <<= 6;  	    c |= d & 0x3F;  	}  + 	/* assertion: c is a single UTF-4 value */  	if (c < 0x80) { 	    if (out >= outend)  		break; 	    *out++ = c;	 	} else { & 	    /* no chance for this in Ascii */ 	    *outlen = out - outstart;" 	    *inlen = processed - instart; 	    return(-2); 	} 	processed = in;     }      *outlen = out - outstart; !     *inlen = processed - instart;      return(0); }    /**   * isolat1ToUTF8: <  * @out:  a pointer to an array of bytes to store the result  * @outlen:  the length of @out 3  * @in:  a pointer to an array of ISO Latin 1 chars   * @inlen:  the length of @in   *I  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8   * block of chars out.(  * Returns 0 if success, or -1 otherwiseD  * The value of @inlen after return is the number of octets consumed<  *     as the return value is positive, else unpredictiable.F  * The value of @outlen after return is the number of ocetes consumed.  */  int . isolat1ToUTF8(unsigned char* out, int *outlen,4               const unsigned char* in, int *inlen) {"     unsigned char* outstart = out;#     const unsigned char* base = in; (     const unsigned char* processed = in;*     unsigned char* outend = out + *outlen;     const unsigned char* inend;      unsigned int c; 
     int bits;        inend = in + (*inlen);<     while ((in < inend) && (out - outstart + 5 < *outlen)) {
 	c= *in++;  + 	/* assertion: c is a single UTF-4 value */          if (out >= outend) 	    break; G         if      (c <    0x80) {  *out++=  c;                bits= -6; } P         else                  {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }   &         for ( ; bits >= 0; bits-= 6) {             if (out >= outend) 	        break; 0             *out++= ((c >> bits) & 0x3F) | 0x80;	         } ' 	processed = (const unsigned char*) in;      }      *outlen = out - outstart;      *inlen = processed - base;     return(0); }    /**   * UTF8Toisolat1: <  * @out:  a pointer to an array of bytes to store the result  * @outlen:  the length of @out -  * @in:  a pointer to an array of UTF-8 chars   * @inlen:  the length of @in   *I  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1   * block of chars out.  *E  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise D  * The value of @inlen after return is the number of octets consumed<  *     as the return value is positive, else unpredictiable.F  * The value of @outlen after return is the number of ocetes consumed.  */  int . UTF8Toisolat1(unsigned char* out, int *outlen,4               const unsigned char* in, int *inlen) {(     const unsigned char* processed = in;      const unsigned char* outend;(     const unsigned char* outstart = out;&     const unsigned char* instart = in;     const unsigned char* inend;      unsigned int c, d;     int trailing;        if (in == NULL) { 
         /*  	 * initialization nothing to do 	 */
 	*outlen = 0;  	*inlen = 0; 	return(0);      }      inend = in + (*inlen);     outend = out + (*outlen);      while (in < inend) { 	d = *in++; + 	if      (d < 0x80)  { c= d; trailing= 0; }  	else if (d < 0xC0) { , 	    /* trailing byte in leading position */ 	    *outlen = out - outstart;" 	    *inlen = processed - instart; 	    return(-2);;         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; } 9         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; } 9         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }  	else { ( 	    /* no chance for this in IsoLat1 */ 	    *outlen = out - outstart;" 	    *inlen = processed - instart; 	    return(-2); 	}   	if (inend - in < trailing) {  	    break;  	}      	for ( ; trailing; trailing--) { 	    if (in >= inend)  		break;' 	    if (((d= *in++) & 0xC0) != 0x80) {  		*outlen = out - outstart;  		*inlen = processed - instart; 
 		return(-2);  	    }
 	    c <<= 6;  	    c |= d & 0x3F;  	}  + 	/* assertion: c is a single UTF-4 value */  	if (c <= 0xFF) {  	    if (out >= outend)  		break; 	    *out++ = c;	 	} else { ( 	    /* no chance for this in IsoLat1 */ 	    *outlen = out - outstart;" 	    *inlen = processed - instart; 	    return(-2); 	} 	processed = in;     }      *outlen = out - outstart; !     *inlen = processed - instart;      return(0); }    /**   * UTF16LEToUTF8: <  * @out:  a pointer to an array of bytes to store the result  * @outlen:  the length of @out B  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array0  * @inlenb:  the length of @in in UTF-16LE chars  *H  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8@  * block of chars out. This function assume the endian properity>  * is the same between the native type of this machine and the  * inputed one.   *D  * Returns the number of byte written, or -1 by lack of space, or -2C  *     if the transcoding fails (for *in is not valid utf16 string) H  *     The value of *inlen after return is the number of octets consumed<  *     as the return value is positive, else unpredictiable.  */  int . UTF16LEToUTF8(unsigned char* out, int *outlen,2             const unsigned char* inb, int *inlenb) { "     unsigned char* outstart = out;)     const unsigned char* processed = inb; *     unsigned char* outend = out + *outlen;/     unsigned short* in = (unsigned short*) inb;      unsigned short* inend;     unsigned int c, d, inlen;      unsigned char *tmp; 
     int bits;        if ((*inlenb % 2) == 1)          (*inlenb)--;     inlen = *inlenb / 2;     inend = in + inlen; <     while ((in < inend) && (out - outstart + 5 < *outlen)) {         if (xmlLittleEndian) { 	    c= *in++;	 	} else {   	    tmp = (unsigned char *) in; 	    c = *tmp++;) 	    c = c | (((unsigned int)*tmp) << 8); 
 	    in++; 	}9         if ((c & 0xFC00) == 0xD800) {    /* surrogates */ F 	    if (in >= inend) {           /* (in > inend) shouldn't happens */ 		break; 	    } 	    if (xmlLittleEndian) {  		d = *in++;
 	    } else {  		tmp = (unsigned char *) in; 
 		d = *tmp++; & 		d = d | (((unsigned int)*tmp) << 8); 		in++;  	    })             if ((d & 0xFC00) == 0xDC00) {                  c &= 0x03FF;                 c <<= 10;                   c |= d & 0x03FF;                 c += 0x10000; 
             }              else { 		*outlen = out - outstart;  		*inlenb = processed - inb; 	        return(-2); 	    }	         }   + 	/* assertion: c is a single UTF-4 value */          if (out >= outend) 	    break; G         if      (c <    0x80) {  *out++=  c;                bits= -6; } P         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }P         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }P         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }   &         for ( ; bits >= 0; bits-= 6) {             if (out >= outend) 	        break; 0             *out++= ((c >> bits) & 0x3F) | 0x80;	         } ' 	processed = (const unsigned char*) in;      }      *outlen = out - outstart;      *inlenb = processed - inb;     return(0); }    /**   * UTF8ToUTF16LE: =  * @outb:  a pointer to an array of bytes to store the result    * @outlen:  the length of @outb-  * @in:  a pointer to an array of UTF-8 chars   * @inlen:  the length of @in   *F  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE  * block of chars out.  *D  * Returns the number of byte written, or -1 by lack of space, or -2"  *     if the transcoding failed.   */  int / UTF8ToUTF16LE(unsigned char* outb, int *outlen, 0             const unsigned char* in, int *inlen) { 1     unsigned short* out = (unsigned short*) outb; (     const unsigned char* processed = in;"     unsigned short* outstart= out;     unsigned short* outend; *     const unsigned char* inend= in+*inlen;     unsigned int c, d;     int trailing;      unsigned char *tmp;      unsigned short tmp1, tmp2;       if (in == NULL) { 
         /*+ 	 * initialization, add the Byte Order Mark  	 */         if (*outlen >= 2) {  	    outb[0] = 0xFF; 	    outb[1] = 0xFE; 	    *outlen = 2;  	    *inlen = 0; #ifdef DEBUG_ENCODING <             fprintf(stderr, "Added FFFE Byte Order Mark\n"); #endif 	    return(2);  	}
 	*outlen = 0;  	*inlen = 0; 	return(0);      } !     outend = out + (*outlen / 2);      while (in < inend) {       d= *in++; 0       if      (d < 0x80)  { c= d; trailing= 0; }       else if (d < 0xC0) {1           /* trailing byte in leading position */ " 	  *outlen = (out - outstart) * 2; 	  *inlen = processed - in;  	  return(-2);9       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; } 7       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; } 7       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }        else {# 	/* no chance for this in UTF-16 */   	*outlen = (out - outstart) * 2; 	*inlen = processed - in;  	return(-2);       }   "       if (inend - in < trailing) {           break;       }   %       for ( ; trailing; trailing--) { =           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 
 	      break;            c <<= 6;           c |= d & 0x3F;       }   0       /* assertion: c is a single UTF-4 value */         if (c < 0x10000) {             if (out >= outend) 	        break;  	    if (xmlLittleEndian) { 
 		*out++ = c; 
 	    } else {  		tmp = (unsigned char *) out; 		*tmp = c ; 		*(tmp + 1) = c >> 8 ;  		out++; 	    }	         }           else if (c < 0x110000) {              if (out+1 >= outend) 	        break;              c -= 0x10000;  	    if (xmlLittleEndian) {  		*out++ = 0xD800 | (c >> 10);! 		*out++ = 0xDC00 | (c & 0x03FF); 
 	    } else {  		tmp1 = 0xD800 | (c >> 10); 		tmp = (unsigned char *) out; 		*tmp = (unsigned char) tmp1; 		*(tmp + 1) = tmp1 >> 8;  		out++;   		tmp2 = 0xDC00 | (c & 0x03FF);  		tmp = (unsigned char *) out; 		*tmp  = (unsigned char) tmp2;  		*(tmp + 1) = tmp2 >> 8;  		out++; 	    }	         }          else 	    break;  	processed = in;     } #     *outlen = (out - outstart) * 2;      *inlen = processed - in;     return(0); }    /**   * UTF16BEToUTF8: <  * @out:  a pointer to an array of bytes to store the result  * @outlen:  the length of @out @  * @inb:  a pointer to an array of UTF-16 passwd as a byte array.  * @inlenb:  the length of @in in UTF-16 chars  *F  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8@  * block of chars out. This function assume the endian properity>  * is the same between the native type of this machine and the  * inputed one.   *D  * Returns the number of byte written, or -1 by lack of space, or -2C  *     if the transcoding fails (for *in is not valid utf16 string) D  * The value of *inlen after return is the number of octets consumed<  *     as the return value is positive, else unpredictiable.  */  int . UTF16BEToUTF8(unsigned char* out, int *outlen,2             const unsigned char* inb, int *inlenb) { "     unsigned char* outstart = out;)     const unsigned char* processed = inb; *     unsigned char* outend = out + *outlen;/     unsigned short* in = (unsigned short*) inb;      unsigned short* inend;     unsigned int c, d, inlen;      unsigned char *tmp; 
     int bits;        if ((*inlenb % 2) == 1)          (*inlenb)--;     inlen = *inlenb / 2;     inend= in + inlen;     while (in < inend) { 	if (xmlLittleEndian) {   	    tmp = (unsigned char *) in; 	    c = *tmp++; 	    c = c << 8;! 	    c = c | (unsigned int) *tmp; 
 	    in++;	 	} else {  	    c= *in++; 	}  9         if ((c & 0xFC00) == 0xD800) {    /* surrogates */ F 	    if (in >= inend) {           /* (in > inend) shouldn't happens */ 		*outlen = out - outstart;  		*inlenb = processed - inb; 	        return(-2); 	    } 	    if (xmlLittleEndian) {  		tmp = (unsigned char *) in; 
 		d = *tmp++; 
 		d = d << 8;  		d = d | (unsigned int) *tmp; 		in++; 
 	    } else {  		d= *in++;  	    })             if ((d & 0xFC00) == 0xDC00) {                  c &= 0x03FF;                 c <<= 10;                   c |= d & 0x03FF;                 c += 0x10000; 
             }              else { 		*outlen = out - outstart;  		*inlenb = processed - inb; 	        return(-2); 	    }	         }   + 	/* assertion: c is a single UTF-4 value */          if (out >= outend)   	    break; G         if      (c <    0x80) {  *out++=  c;                bits= -6; } P         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }P         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }P         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }   &         for ( ; bits >= 0; bits-= 6) {             if (out >= outend)   	        break; 0             *out++= ((c >> bits) & 0x3F) | 0x80;	         } ' 	processed = (const unsigned char*) in;      }      *outlen = out - outstart;      *inlenb = processed - inb;     return(0); }    /**   * UTF8ToUTF16BE: =  * @outb:  a pointer to an array of bytes to store the result    * @outlen:  the length of @outb-  * @in:  a pointer to an array of UTF-8 chars   * @inlen:  the length of @in   *F  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE  * block of chars out.  *D  * Returns the number of byte written, or -1 by lack of space, or -2"  *     if the transcoding failed.   */  int / UTF8ToUTF16BE(unsigned char* outb, int *outlen, 0             const unsigned char* in, int *inlen) { 1     unsigned short* out = (unsigned short*) outb; (     const unsigned char* processed = in;"     unsigned short* outstart= out;     unsigned short* outend; *     const unsigned char* inend= in+*inlen;     unsigned int c, d;     int trailing;      unsigned char *tmp;      unsigned short tmp1, tmp2;       if (in == NULL) { 
         /*+ 	 * initialization, add the Byte Order Mark  	 */         if (*outlen >= 2) {  	    outb[0] = 0xFE; 	    outb[1] = 0xFF; 	    *outlen = 2;  	    *inlen = 0; #ifdef DEBUG_ENCODING <             fprintf(stderr, "Added FEFF Byte Order Mark\n"); #endif 	    return(2);  	}
 	*outlen = 0;  	*inlen = 0; 	return(0);      } !     outend = out + (*outlen / 2);      while (in < inend) {       d= *in++; 0       if      (d < 0x80)  { c= d; trailing= 0; }       else if (d < 0xC0)  { 1           /* trailing byte in leading position */  	  *outlen = out - outstart; 	  *inlen = processed - in;  	  return(-2);9       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; } 7       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; } 7       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }        else {,           /* no chance for this in UTF-16 */ 	  *outlen = out - outstart; 	  *inlen = processed - in;  	  return(-2);       }   "       if (inend - in < trailing) {           break;       }   %       for ( ; trailing; trailing--) { E           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;            c <<= 6;           c |= d & 0x3F;       }   0       /* assertion: c is a single UTF-4 value */         if (c < 0x10000) {&             if (out >= outend)  break; 	    if (xmlLittleEndian) {  		tmp = (unsigned char *) out; 		*tmp = c >> 8; 		*(tmp + 1) = c;  		out++;
 	    } else { 
 		*out++ = c;  	    }	         }           else if (c < 0x110000) {(             if (out+1 >= outend)  break;             c -= 0x10000;  	    if (xmlLittleEndian) {  		tmp1 = 0xD800 | (c >> 10); 		tmp = (unsigned char *) out; 		*tmp = tmp1 >> 8; $ 		*(tmp + 1) = (unsigned char) tmp1; 		out++;   		tmp2 = 0xDC00 | (c & 0x03FF);  		tmp = (unsigned char *) out; 		*tmp = tmp2 >> 8; $ 		*(tmp + 1) = (unsigned char) tmp2; 		out++;
 	    } else {  		*out++ = 0xD800 | (c >> 10);! 		*out++ = 0xDC00 | (c & 0x03FF);  	    }	         }          else 	    break;  	processed = in;     } #     *outlen = (out - outstart) * 2;      *inlen = processed - in;     return(0); }    /**   * xmlDetectCharEncoding: I  * @in:  a pointer to the first bytes of the XML entity, must be at least   *       4 bytes long.-  * @len:  pointer to the length of the buffer   *O  * Guess the encoding of the entity using the first bytes of the entity content M  * accordingly of the non-normative appendix F of the XML-1.0 recommendation.   *  3  * Returns one of the XML_CHAR_ENCODING_... values.   */  xmlCharEncoding 7 xmlDetectCharEncoding(const unsigned char* in, int len)  {      if (len >= 4) { * 	if ((in[0] == 0x00) && (in[1] == 0x00) &&( 	    (in[2] == 0x00) && (in[3] == 0x3C))& 	    return(XML_CHAR_ENCODING_UCS4BE);* 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&( 	    (in[2] == 0x00) && (in[3] == 0x00))& 	    return(XML_CHAR_ENCODING_UCS4LE);* 	if ((in[0] == 0x00) && (in[1] == 0x00) &&( 	    (in[2] == 0x3C) && (in[3] == 0x00))) 	    return(XML_CHAR_ENCODING_UCS4_2143); * 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&( 	    (in[2] == 0x00) && (in[3] == 0x00))) 	    return(XML_CHAR_ENCODING_UCS4_3412); * 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&( 	    (in[2] == 0xA7) && (in[3] == 0x94))& 	    return(XML_CHAR_ENCODING_EBCDIC);* 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&( 	    (in[2] == 0x78) && (in[3] == 0x6D))$ 	    return(XML_CHAR_ENCODING_UTF8);     }      if (len >= 2) { ( 	if ((in[0] == 0xFE) && (in[1] == 0xFF))' 	    return(XML_CHAR_ENCODING_UTF16BE); ( 	if ((in[0] == 0xFF) && (in[1] == 0xFE))' 	    return(XML_CHAR_ENCODING_UTF16LE);      } #     return(XML_CHAR_ENCODING_NONE);  }    /**   * xmlCleanupEncodingAliases:   *  * Unregisters all aliases  */  void! xmlCleanupEncodingAliases(void) { 
     int i;  '     if (xmlCharEncodingAliases == NULL)  	return;  2     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {, 	if (xmlCharEncodingAliases[i].name != NULL)6 	    xmlFree((char *) xmlCharEncodingAliases[i].name);- 	if (xmlCharEncodingAliases[i].alias != NULL) 7 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);      } !     xmlCharEncodingAliasesNb = 0; "     xmlCharEncodingAliasesMax = 0;$     xmlFree(xmlCharEncodingAliases); }    /**   * xmlGetEncodingAlias: F  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)  */  * Lookup an encoding name for the given alias.   *  8  * Returns NULL if not found the original name otherwise  */  const char *( xmlGetEncodingAlias(const char *alias) {
     int i;     char upper[100];       if (alias == NULL) 	return(NULL);  '     if (xmlCharEncodingAliases == NULL)  	return(NULL);       for (i = 0;i < 99;i++) {%         upper[i] = toupper(alias[i]);  	if (upper[i] == 0) break;     }      upper[i] = 0;        /*?      * Walk down the list looking for a definition of the alias       */ 2     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {7 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { , 	    return(xmlCharEncodingAliases[i].name); 	}     }      return(NULL);  }    /**   * xmlAddEncodingAlias: H  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)F  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)  *I  * Registers and alias @alias for an encoding named @name. Existing alias   * will be overwritten.   *  4  * Returns 0 in case of success, -1 in case of error  */  int : xmlAddEncodingAlias(const char *name, const char *alias) {
     int i;     char upper[100];  *     if ((name == NULL) || (alias == NULL)) 	return(-1);       for (i = 0;i < 99;i++) {%         upper[i] = toupper(alias[i]);  	if (upper[i] == 0) break;     }      upper[i] = 0;   )     if (xmlCharEncodingAliases == NULL) {  	xmlCharEncodingAliasesNb = 0;  	xmlCharEncodingAliasesMax = 20;4 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) K 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); $ 	if (xmlCharEncodingAliases == NULL) 	    return(-1);G     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {   	xmlCharEncodingAliasesMax *= 2;4 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) ) 	      xmlRealloc(xmlCharEncodingAliases, E 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));      }      /*?      * Walk down the list looking for a definition of the alias       */ 2     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {7 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {  	    /*  	     * Replace the definition.  	     */6 	    xmlFree((char *) xmlCharEncodingAliases[i].name);9 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);  	    return(0);  	}     }      /*      * Add the definition       */ O     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); Q     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);      xmlCharEncodingAliasesNb++;      return(0); }    /**   * xmlDelEncodingAlias: F  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)  *'  * Unregisters an encoding alias @alias   *  4  * Returns 0 in case of success, -1 in case of error  */  int ( xmlDelEncodingAlias(const char *alias) {
     int i;       if (alias == NULL) 	return(-1);  '     if (xmlCharEncodingAliases == NULL)  	return(-1);     /*?      * Walk down the list looking for a definition of the alias       */ 2     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {7 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 6 	    xmlFree((char *) xmlCharEncodingAliases[i].name);7 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);   	    xmlCharEncodingAliasesNb--;H 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],E 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));  	    return(0);  	}     }      return(-1);  }    /**   * xmlParseCharEncoding:H  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)  *G  * Conpare the string to the known encoding schemes already known. Note E  * that the comparison is case insensitive accordingly to the section .  * [XML] 4.3.3 Character Encoding in Entities.  *  L  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE  * if not recognized.   */  xmlCharEncoding & xmlParseCharEncoding(const char* name) {      const char *alias;     char upper[500];
     int i;       if (name == NULL)   	return(XML_CHAR_ENCODING_NONE);       /*      * Do the alias resolution      */ &     alias = xmlGetEncodingAlias(name);     if (alias != NULL) 	name = alias;       for (i = 0;i < 499;i++) { $         upper[i] = toupper(name[i]); 	if (upper[i] == 0) break;     }      upper[i] = 0;   ;     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); @     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);?     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);        /*F      * NOTE: if we were able to parse this, the endianness of UTF16 is%      *       already found and in use       */ D     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);C     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);      J     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);@     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);?     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);        /*E      * NOTE: if we were able to parse this, the endianness of UCS4 is %      *       already found and in use       */ L     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);B     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);A     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);        H     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);I     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); I     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);   H     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);I     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); I     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);   H     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);H     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);H     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);H     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);H     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);H     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);H     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);  I     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); I     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); C     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);    #ifdef DEBUG_ENCODING 3     fprintf(stderr, "Unknown encoding %s\n", name);  #endif$     return(XML_CHAR_ENCODING_ERROR); }    /**   * xmlGetCharEncodingName:  * @enc:  the encoding  *)  * The "canonical" name for XML encoding. 1  * C.f. http://www.w3.org/TR/REC-xml#charencoding 0  * Section 4.3.3  Character Encoding in Entities  *4  * Returns the canonical name for the given encoding  */    const char* - xmlGetCharEncodingName(xmlCharEncoding enc) {      switch (enc) {%         case XML_CHAR_ENCODING_ERROR:  	    return(NULL);$         case XML_CHAR_ENCODING_NONE: 	    return(NULL);$         case XML_CHAR_ENCODING_UTF8: 	    return("UTF-8"); '         case XML_CHAR_ENCODING_UTF16LE:  	    return("UTF-16");'         case XML_CHAR_ENCODING_UTF16BE:  	    return("UTF-16");&         case XML_CHAR_ENCODING_EBCDIC:             return("EBCDIC"); &         case XML_CHAR_ENCODING_UCS4LE:&             return("ISO-10646-UCS-4");&         case XML_CHAR_ENCODING_UCS4BE:&             return("ISO-10646-UCS-4");)         case XML_CHAR_ENCODING_UCS4_2143: &             return("ISO-10646-UCS-4");)         case XML_CHAR_ENCODING_UCS4_3412: &             return("ISO-10646-UCS-4");$         case XML_CHAR_ENCODING_UCS2:&             return("ISO-10646-UCS-2");&         case XML_CHAR_ENCODING_8859_1: 	    return("ISO-8859-1");&         case XML_CHAR_ENCODING_8859_2: 	    return("ISO-8859-2");&         case XML_CHAR_ENCODING_8859_3: 	    return("ISO-8859-3");&         case XML_CHAR_ENCODING_8859_4: 	    return("ISO-8859-4");&         case XML_CHAR_ENCODING_8859_5: 	    return("ISO-8859-5");&         case XML_CHAR_ENCODING_8859_6: 	    return("ISO-8859-6");&         case XML_CHAR_ENCODING_8859_7: 	    return("ISO-8859-7");&         case XML_CHAR_ENCODING_8859_8: 	    return("ISO-8859-8");&         case XML_CHAR_ENCODING_8859_9: 	    return("ISO-8859-9");'         case XML_CHAR_ENCODING_2022_JP: "             return("ISO-2022-JP");)         case XML_CHAR_ENCODING_SHIFT_JIS:               return("Shift-JIS");&         case XML_CHAR_ENCODING_EUC_JP:             return("EUC-JP");  	case XML_CHAR_ENCODING_ASCII: 	    return(NULL);     }      return(NULL);  }   A /****************************************************************   *								*   *		Char encoding handlers				*   *								* B  ****************************************************************/  > /* the size should be growable, but it's not a big deal ... */  #define MAX_ENCODING_HANDLERS 502 static xmlCharEncodingHandlerPtr *handlers = NULL;% static int nbCharEncodingHandler = 0;    /*E  * The default is UTF-8 for XML, that's also the default used for the <  * parser internals, so the default encoding handler is NULL  */   F static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;   /**   * xmlNewCharEncodingHandler: >  * @name:  the encoding name, in UTF-8 format (ASCII actually)>  * @input:  the xmlCharEncodingInputFunc to read that encodingA  * @output:  the xmlCharEncodingOutputFunc to write that encoding   *2  * Create and registers an xmlCharEncodingHandler.L  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).  */  xmlCharEncodingHandlerPtr , xmlNewCharEncodingHandler(const char *name, 9                           xmlCharEncodingInputFunc input, =                           xmlCharEncodingOutputFunc output) { &     xmlCharEncodingHandlerPtr handler;     const char *alias;     char upper[500];
     int i;     char *up = 0;        /*      * Do the alias resolution      */ &     alias = xmlGetEncodingAlias(name);     if (alias != NULL) 	name = alias;       /*7      * Keep only the uppercase version of the encoding.       */      if (name == NULL) { C         fprintf(stderr, "xmlNewCharEncodingHandler : no name !\n");  	return(NULL);     }      for (i = 0;i < 499;i++) { $         upper[i] = toupper(name[i]); 	if (upper[i] == 0) break;     }      upper[i] = 0;      up = xmlMemStrdup(upper);      if (up == NULL) { I         fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");  	return(NULL);     }        /*-      * allocate and fill-up an handler block.       */ )     handler = (xmlCharEncodingHandlerPtr) 8               xmlMalloc(sizeof(xmlCharEncodingHandler));     if (handler == NULL) {I         fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");  	return(NULL);     }      handler->input = input;      handler->output = output;      handler->name = up;    #ifdef LIBXML_ICONV_ENABLED      handler->iconv_in = NULL;      handler->iconv_out = NULL;! #endif /* LIBXML_ICONV_ENABLED */        /*)      * registers and returns the handler.       */ ,     xmlRegisterCharEncodingHandler(handler); #ifdef DEBUG_ENCODING B     fprintf(stderr, "Registered encoding handler for %s\n", name); #endif     return(handler); }    /**   * xmlInitCharEncodingHandlers:   *A  * Initialize the char encoding support, it registers the default   * encoding supported.F  * NOTE: while public, this function usually doesn't need to be called  *       in normal processing.  */  void# xmlInitCharEncodingHandlers(void) { $     unsigned short int tst = 0x1234;1     unsigned char *ptr = (unsigned char *) &tst;    !     if (handlers != NULL) return;   ,     handlers = (xmlCharEncodingHandlerPtr *)M         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));   *     if (*ptr == 0x12) xmlLittleEndian = 0;/     else if (*ptr == 0x34) xmlLittleEndian = 1; B     else fprintf(stderr, "Odd problem at endianness detection\n");       if (handlers == NULL) { K         fprintf(stderr, "xmlInitCharEncodingHandlers : out of memory !\n");  	return;     } 3     xmlNewCharEncodingHandler("UTF-8", NULL, NULL);      xmlUTF16LEHandler = N           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);     xmlUTF16BEHandler = N           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);J     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);A     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);  #ifdef LIBXML_HTML_ENABLED8     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); #endif }    /** "  * xmlCleanupCharEncodingHandlers:  *A  * Cleanup the memory allocated for the char encoding support, it 9  * unregisters all the encoding handlers and the aliases.   */  void& xmlCleanupCharEncodingHandlers(void) {      xmlCleanupEncodingAliases();  !     if (handlers == NULL) return;   '     for (;nbCharEncodingHandler > 0;) {           nbCharEncodingHandler--;/ 	if (handlers[nbCharEncodingHandler] != NULL) { 7 	    if (handlers[nbCharEncodingHandler]->name != NULL) 1 		xmlFree(handlers[nbCharEncodingHandler]->name); . 	    xmlFree(handlers[nbCharEncodingHandler]); 	}     }      xmlFree(handlers);     handlers = NULL;     nbCharEncodingHandler = 0;)     xmlDefaultCharEncodingHandler = NULL;  }    /** "  * xmlRegisterCharEncodingHandler:9  * @handler:  the xmlCharEncodingHandlerPtr handler block   *=  * Register the char encoding handler, surprizing, isn't it ?   */  voidC xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 8     if (handlers == NULL) xmlInitCharEncodingHandlers();     if (handler == NULL) {L         fprintf(stderr, "xmlRegisterCharEncodingHandler: NULL handler !\n"); 	return;     }   9     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {          fprintf(stderr, B 	"xmlRegisterCharEncodingHandler: Too many handler registered\n");M         fprintf(stderr, "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);  	return;     } 0     handlers[nbCharEncodingHandler++] = handler; }    /**   * xmlGetCharEncodingHandler: #  * @enc:  an xmlCharEncoding value.   *N  * Search in the registrered set the handler able to read/write that encoding.  *+  * Returns the handler or NULL if not found   */  xmlCharEncodingHandlerPtr 0 xmlGetCharEncodingHandler(xmlCharEncoding enc) {&     xmlCharEncodingHandlerPtr handler;  8     if (handlers == NULL) xmlInitCharEncodingHandlers();     switch (enc) {%         case XML_CHAR_ENCODING_ERROR:  	    return(NULL);$         case XML_CHAR_ENCODING_NONE: 	    return(NULL);$         case XML_CHAR_ENCODING_UTF8: 	    return(NULL);'         case XML_CHAR_ENCODING_UTF16LE:  	    return(xmlUTF16LEHandler); '         case XML_CHAR_ENCODING_UTF16BE:  	    return(xmlUTF16BEHandler); &         case XML_CHAR_ENCODING_EBCDIC:;             handler = xmlFindCharEncodingHandler("EBCDIC"); 1             if (handler != NULL) return(handler); ;             handler = xmlFindCharEncodingHandler("ebcdic"); 1             if (handler != NULL) return(handler);  	    break; &         case XML_CHAR_ENCODING_UCS4BE:D             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");1             if (handler != NULL) return(handler); :             handler = xmlFindCharEncodingHandler("UCS-4");1             if (handler != NULL) return(handler); 9             handler = xmlFindCharEncodingHandler("UCS4"); 1             if (handler != NULL) return(handler);  	    break; &         case XML_CHAR_ENCODING_UCS4LE:D             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");1             if (handler != NULL) return(handler); :             handler = xmlFindCharEncodingHandler("UCS-4");1             if (handler != NULL) return(handler); 9             handler = xmlFindCharEncodingHandler("UCS4"); 1             if (handler != NULL) return(handler);  	    break; )         case XML_CHAR_ENCODING_UCS4_2143:  	    break; )         case XML_CHAR_ENCODING_UCS4_3412:  	    break; $         case XML_CHAR_ENCODING_UCS2:D             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");1             if (handler != NULL) return(handler); :             handler = xmlFindCharEncodingHandler("UCS-2");1             if (handler != NULL) return(handler); 9             handler = xmlFindCharEncodingHandler("UCS2"); 1             if (handler != NULL) return(handler);  	    break;    	    /* 9 	     * We used to keep ISO Latin encodings native in the 9 	     * generated data. This led to so many problems that 8 	     * this has been removed. One can still change this5 	     * back by registering no-ops encoders for those  	     */&         case XML_CHAR_ENCODING_8859_1:8 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");* 	    if (handler != NULL) return(handler); 	    break; &         case XML_CHAR_ENCODING_8859_2:8 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");* 	    if (handler != NULL) return(handler); 	    break; &         case XML_CHAR_ENCODING_8859_3:8 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");* 	    if (handler != NULL) return(handler); 	    break; &         case XML_CHAR_ENCODING_8859_4:8 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");* 	    if (handler != NULL) return(handler); 	    break; &         case XML_CHAR_ENCODING_8859_5:8 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");* 	    if (handler != NULL) return(handler); 	    break; &         case XML_CHAR_ENCODING_8859_6:8 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");* 	    if (handler != NULL) return(handler); 	    break; &         case XML_CHAR_ENCODING_8859_7:8 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");* 	    if (handler != NULL) return(handler); 	    break; &         case XML_CHAR_ENCODING_8859_8:8 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");* 	    if (handler != NULL) return(handler); 	    break; &         case XML_CHAR_ENCODING_8859_9:8 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");* 	    if (handler != NULL) return(handler); 	    break;     '         case XML_CHAR_ENCODING_2022_JP: @             handler = xmlFindCharEncodingHandler("ISO-2022-JP");1             if (handler != NULL) return(handler);  	    break; )         case XML_CHAR_ENCODING_SHIFT_JIS: >             handler = xmlFindCharEncodingHandler("SHIFT-JIS");1             if (handler != NULL) return(handler); >             handler = xmlFindCharEncodingHandler("SHIFT_JIS");1             if (handler != NULL) return(handler); >             handler = xmlFindCharEncodingHandler("Shift_JIS");1             if (handler != NULL) return(handler);  	    break; &         case XML_CHAR_ENCODING_EUC_JP:;             handler = xmlFindCharEncodingHandler("EUC-JP"); 1             if (handler != NULL) return(handler);  	    break; 
 	default:  	    break;      }       #ifdef DEBUG_ENCODING ?     fprintf(stderr, "No handler found for encoding %d\n", enc);  #endif     return(NULL);  }    /**   * xmlGetCharEncodingHandler: 0  * @enc:  a string describing the char encoding.  *N  * Search in the registrered set the handler able to read/write that encoding.  *+  * Returns the handler or NULL if not found   */  xmlCharEncodingHandlerPtr . xmlFindCharEncodingHandler(const char *name) {     const char *nalias;      const char *norig;     xmlCharEncoding alias; #ifdef LIBXML_ICONV_ENABLED "     xmlCharEncodingHandlerPtr enc;     iconv_t icv_in, icv_out;! #endif /* LIBXML_ICONV_ENABLED */      char upper[100];
     int i;  8     if (handlers == NULL) xmlInitCharEncodingHandlers();<     if (name == NULL) return(xmlDefaultCharEncodingHandler);<     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);       /*      * Do the alias resolution      */      norig = name; '     nalias = xmlGetEncodingAlias(name);      if (nalias != NULL)  	name = nalias;        /*9      * Check first for directly registered encoding names       */      for (i = 0;i < 99;i++) {$         upper[i] = toupper(name[i]); 	if (upper[i] == 0) break;     }      upper[i] = 0;   .     for (i = 0;i < nbCharEncodingHandler; i++)0         if (!strcmp(upper, handlers[i]->name)) { #ifdef DEBUG_ENCODING P             fprintf(stderr, "Found registered handler for encoding %s\n", name); #endif 	    return(handlers[i]);  	}   #ifdef LIBXML_ICONV_ENABLED -     /* check whether iconv can handle this */ '     icv_in = iconv_open("UTF-8", name); (     icv_out = iconv_open(name, "UTF-8");@     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {& 	    enc = (xmlCharEncodingHandlerPtr)5 	          xmlMalloc(sizeof(xmlCharEncodingHandler));  	    if (enc == NULL) {  	        iconv_close(icv_in);  	        iconv_close(icv_out); 		return(NULL);  	    }$ 	    enc->name = xmlMemStrdup(name); 	    enc->input = NULL;  	    enc->output = NULL; 	    enc->iconv_in = icv_in; 	    enc->iconv_out = icv_out; #ifdef DEBUG_ENCODING K             fprintf(stderr, "Found iconv handler for encoding %s\n", name);  #endif 	    return enc;E     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { G 	    fprintf(stderr, "iconv : problems with filters for '%s'\n", name);      } ! #endif /* LIBXML_ICONV_ENABLED */    #ifdef DEBUG_ENCODING @     fprintf(stderr, "No handler found for encoding %s\n", name); #endif       /*)      * Fallback using the canonical names       */ (     alias = xmlParseCharEncoding(norig);+     if (alias != XML_CHAR_ENCODING_ERROR) {          const char* canon;.         canon = xmlGetCharEncodingName(alias);7         if ((canon != NULL) && (strcmp(name, canon))) { / 	    return(xmlFindCharEncodingHandler(canon)); 	         }      }        return(NULL);  }    #ifdef LIBXML_ICONV_ENABLED  /**   * xmlIconvWrapper: '  * @cd:		iconv converter data structure <  * @out:  a pointer to an array of bytes to store the result  * @outlen:  the length of @out 3  * @in:  a pointer to an array of ISO Latin 1 chars   * @inlen:  the length of @in   *  * Returns 0 if success, or   *     -1 by lack of space, orG  *     -2 if the transcoding fails (for *in is not valid utf8 string or O  *        the result of transformation can't fit into the encoding we want), or A  *     -3 if there the last byte can't form a single output char.   *      D  * The value of @inlen after return is the number of octets consumed<  *     as the return value is positive, else unpredictiable.F  * The value of @outlen after return is the number of ocetes consumed.  */ 
 static int xmlIconvWrapper(iconv_t cd, ! 	unsigned char *out, int *outlen, ' 	const unsigned char *in, int *inlen) {   1 	size_t icv_inlen = *inlen, icv_outlen = *outlen; ( 	const char *icv_in = (const char *) in; 	char *icv_out = (char *) out;	 	int ret;    	ret = iconv(cd, 		&icv_in, &icv_inlen, 		&icv_out, &icv_outlen);  	if (in != NULL) { 	    *inlen -= icv_inlen;  	    *outlen -= icv_outlen; 	 	} else {  	    *inlen = 0; 	    *outlen = 0;  	}, 	if (icv_inlen != 0 || ret == (size_t) -1) {
 #ifdef EILSEQ  		if (errno == EILSEQ) {
 			return -2;  		} else #endif #ifdef E2BIG 		if (errno == E2BIG) { 
 			return -1;  		} else #endif
 #ifdef EINVAL  		if (errno == EINVAL) {
 			return -3;  		}  #endif 		else {
 			return -3;  		}  	}
 	return 0; } ! #endif /* LIBXML_ICONV_ENABLED */    /**   * xmlCharEncFirstLine: 9  * @handler:	char enconding transformation data structure &  * @out:  an xmlBuffer for the output.#  * @in:  an xmlBuffer for the input   *      E  * Front-end for the encoding handler input function, but handle only 6  * the very first line, i.e. limit itself to 45 chars.  *      5  * Returns the number of byte written if success, or    *     -1 general error G  *     -2 if the transcoding fails (for *in is not valid utf8 string or O  *        the result of transformation can't fit into the encoding we want), or   */  int F xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,#                  xmlBufferPtr in) {      int ret = -2;      int written;     int toconv;   $     if (handler == NULL) return(-1);      if (out == NULL) return(-1);     if (in == NULL) return(-1);   #     written = out->size - out->use;      toconv = in->use;       if (toconv * 2 >= written) {#         xmlBufferGrow(out, toconv); $ 	written = out->size - out->use - 1;     }        /*A      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 E      * 45 chars should be sufficient to reach the end of the encoding E      * decalration without going too far inside the document content.       */      written = 45;   !     if (handler->input != NULL) { 8 	ret = handler->input(&out->content[out->use], &written,, 	                     in->content, &toconv); 	xmlBufferShrink(in, toconv);  	out->use += written;  	out->content[out->use] = 0;     }  #ifdef LIBXML_ICONV_ENABLED )     else if (handler->iconv_in != NULL) { B 	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],7 	                      &written, in->content, &toconv);  	xmlBufferShrink(in, toconv);  	out->use += written;  	out->content[out->use] = 0; 	if (ret == -1) ret = -3;      } ! #endif /* LIBXML_ICONV_ENABLED */  #ifdef DEBUG_ENCODING      switch (ret) {         case 0: A 	    fprintf(stderr, "converted %d bytes to %d bytes of input\n",  	            toconv, written); 	    break;          case -1:I 	    fprintf(stderr,"converted %d bytes to %d bytes of input, %d left\n", ' 	            toconv, written, in->use);  	    break;          case -2:E 	    fprintf(stderr, "input conversion failed due to input error\n");  	    break;          case -3:I 	    fprintf(stderr,"converted %d bytes to %d bytes of input, %d left\n", ' 	            toconv, written, in->use);  	    break; 	 	default: A 	    fprintf(stderr,"Unknown input conversion failed %d\n", ret);      }  #endif     /*4      * Ignore when input buffer is not on a boundary      */      if (ret == -3) ret = 0;      if (ret == -1) ret = 0;      return(ret); }    /**   * xmlCharEncInFunc:9  * @handler:	char enconding transformation data structure &  * @out:  an xmlBuffer for the output.#  * @in:  an xmlBuffer for the input   *      <  * Generic front-end for the encoding handler input function  *      5  * Returns the number of byte written if success, or    *     -1 general error G  *     -2 if the transcoding fails (for *in is not valid utf8 string or O  *        the result of transformation can't fit into the encoding we want), or   */  int C xmlCharEncInFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, #                  xmlBufferPtr in) {      int ret = -2;      int written;     int toconv;   $     if (handler == NULL) return(-1);      if (out == NULL) return(-1);     if (in == NULL) return(-1);        toconv = in->use;      if (toconv == 0) 	return(0); #     written = out->size - out->use;       if (toconv * 2 >= written) {'         xmlBufferGrow(out, toconv * 2); $ 	written = out->size - out->use - 1;     } !     if (handler->input != NULL) { 8 	ret = handler->input(&out->content[out->use], &written,, 	                     in->content, &toconv); 	xmlBufferShrink(in, toconv);  	out->use += written;  	out->content[out->use] = 0;     }  #ifdef LIBXML_ICONV_ENABLED )     else if (handler->iconv_in != NULL) { B 	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],7 	                      &written, in->content, &toconv);  	xmlBufferShrink(in, toconv);  	out->use += written;  	out->content[out->use] = 0; 	if (ret == -1) ret = -3;      } ! #endif /* LIBXML_ICONV_ENABLED */      switch (ret) { #ifdef DEBUG_ENCODING          case 0: A 	    fprintf(stderr, "converted %d bytes to %d bytes of input\n",  	            toconv, written); 	    break;          case -1:I 	    fprintf(stderr,"converted %d bytes to %d bytes of input, %d left\n", ' 	            toconv, written, in->use);  	    break;          case -3:I 	    fprintf(stderr,"converted %d bytes to %d bytes of input, %d left\n", ' 	            toconv, written, in->use);  	    break;  #endif         case -2:E 	    fprintf(stderr, "input conversion failed due to input error\n"); < 	    fprintf(stderr, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",% 		    in->content[0], in->content[1], & 		    in->content[2], in->content[3]);     }      /*4      * Ignore when input buffer is not on a boundary      */      if (ret == -3) ret = 0;      return(ret); }    /**   * xmlCharEncOutFunc: 9  * @handler:	char enconding transformation data structure &  * @out:  an xmlBuffer for the output.#  * @in:  an xmlBuffer for the input   *      =  * Generic front-end for the encoding handler output function E  * a first call with @in == NULL has to be made firs to initiate the  E  * output in case of non-stateless encoding needing to initiate their /  * state or the output (like the BOM in UTF16). D  * In case of UTF8 sequence conversion errors for the given encoder,D  * the content will be automatically remapped to a CharRef sequence.  *      5  * Returns the number of byte written if success, or    *     -1 general error G  *     -2 if the transcoding fails (for *in is not valid utf8 string or O  *        the result of transformation can't fit into the encoding we want), or   */  int D xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,$                   xmlBufferPtr in) {     int ret = -2;      int written;     int toconv;      int output = 0;   $     if (handler == NULL) return(-1);      if (out == NULL) return(-1);   retry:     #     written = out->size - out->use;        /*I      * First specific handling of in = NULL, i.e. the initialization call       */      if (in == NULL) {          toconv = 0;  	if (handler->output != NULL) { = 	    ret = handler->output(&out->content[out->use], &written,  				  NULL, &toconv);  	    out->use += written;   	    out->content[out->use] = 0; 	} #ifdef LIBXML_ICONV_ENABLED ' 	else if (handler->iconv_out != NULL) { G 	    ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],  				  &written, NULL, &toconv);  	    out->use += written;   	    out->content[out->use] = 0; 	}! #endif /* LIBXML_ICONV_ENABLED */  #ifdef DEBUG_ENCODING * 	fprintf(stderr, "initialized encoder\n"); #endif         return(0);     }        /*      * Convertion itself.       */      toconv = in->use;      if (toconv == 0) 	return(0);       if (toconv * 2 >= written) {'         xmlBufferGrow(out, toconv * 2); $ 	written = out->size - out->use - 1;     } "     if (handler->output != NULL) {9 	ret = handler->output(&out->content[out->use], &written, - 	                      in->content, &toconv);  	xmlBufferShrink(in, toconv);  	out->use += written;  	out->content[out->use] = 0;     }  #ifdef LIBXML_ICONV_ENABLED *     else if (handler->iconv_out != NULL) {C 	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 7 	                      &written, in->content, &toconv);  	xmlBufferShrink(in, toconv);  	out->use += written;  	out->content[out->use] = 0; 	if (ret == -1) ret = -3;      } ! #endif /* LIBXML_ICONV_ENABLED */ 
     else {> 	fprintf(stderr, "xmlCharEncOutFunc: no output function !\n"); 	return(-1);     }         if (ret >= 0) output += ret;       /*$      * Attempt to handle error cases      */      switch (ret) { #ifdef DEBUG_ENCODING          case 0: B 	    fprintf(stderr, "converted %d bytes to %d bytes of output\n", 	            toconv, written); 	    break;          case -1:D 	    fprintf(stderr, "output conversion failed by lack of space\n"); 	    break;          case -3:I 	    fprintf(stderr,"converted %d bytes to %d bytes of output %d left\n", ' 	            toconv, written, in->use);  	    break;  #endif         case -2: { 	    int len = in->use; 8 	    const xmlChar *utf = (const xmlChar *) in->content;
 	    int cur;   % 	    cur = xmlGetUTF8Char(utf, &len);  	    if (cur > 0) {  		xmlChar charref[20];   #ifdef DEBUG_ENCODING 8 		fprintf(stderr, "handling output conversion error\n");9 		fprintf(stderr, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", " 			in->content[0], in->content[1],# 			in->content[2], in->content[3]);  #endif 		/*; 		 * Removes the UTF8 sequence, and replace it by a charref 9 		 * and continue the transcoding phase, hoping the error & 		 * did not mangle the encoder state. 		 */ + 		sprintf((char *) charref, "&#x%X;", cur);  		xmlBufferShrink(in, len); $ 		xmlBufferAddHead(in, charref, -1);  
 		goto retry; 
 	    } else { B 		fprintf(stderr, "output conversion failed due to conv error\n");9 		fprintf(stderr, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", " 			in->content[0], in->content[1],# 			in->content[2], in->content[3]);  		in->content[0] = ' ';  	    } 	    break;  	}     }      return(ret); }    /**   * xmlCharEncCloseFunc: 9  * @handler:	char enconding transformation data structure   *      9  * Generic front-end for hencoding handler close function   */  * Returns 0 if success, or -1 in case of error   */  int 6 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {     int ret = 0;$     if (handler == NULL) return(-1);*     if (handler->name == NULL) return(-1); #ifdef LIBXML_ICONV_ENABLED      /*C      * Iconv handlers can be oused only once, free the whole block. )      * and the associated icon resources.       */ F     if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { 	if (handler->name != NULL)  	    xmlFree(handler->name); 	handler->name = NULL;" 	if (handler->iconv_out != NULL) {) 	    if (iconv_close(handler->iconv_out))  		ret = -1;  	    handler->iconv_out = NULL;  	}! 	if (handler->iconv_in != NULL) { ( 	    if (iconv_close(handler->iconv_in)) 		ret = -1;  	    handler->iconv_in = NULL; 	} 	xmlFree(handler);     } ! #endif /* LIBXML_ICONV_ENABLED */  #ifdef DEBUG_ENCODING      if (ret)B         fprintf(stderr, "failed to close the encoding handler\n");     else9         fprintf(stderr, "closed the encoding handler\n");    #endif     return(ret); }   