Menu

#44 cJSON can cause segment fault

v1.0 (example)
closed-fixed
None
1
2016-03-19
2015-02-24
kolman
No

cJSON parse_string can cause segment fault when parsing invalid unicode strings (When parsing some content from libcurl, because network problems, the unicode is half of the original string. e.g. \u0571 ==> \u0)

in this function, the length used to allocate memory for ptr2 is determined by ptr, but when parsing ptr, if the unicode is not correct, parse_hex4 will return 0, but ptr is still added by 4 or 6:
....
uc=parse_hex4(ptr);ptr+=4;
....
uc2=parse_hex4(ptr);ptr+=6;

the pointer ptr will jump after the end of the input string, and looks for \" or 0 to end, and keeps on writing memorys pointed by ptr2. But ptr2 memory may not be enough ... and write something to some memory owned by other codes ...

a simple patch will be:

change
static unsigned parse_hex4(const char * str)
to
static unsigned parse_hex4(const char * & str)

and change
uc=parse_hex4(ptr+1)
to:
ptr++;
uc=parse_hex4(ptr);
//ptr+=4; / get the unicode char. /

and change
uc2=parse_hex4(ptr+3);ptr+=6;

to
uc2=parse_hex4(ptr+3);
//ptr+=6;

Discussion

  • kolman

    kolman - 2015-02-24

    a simpler fix:
    cJSON.c file:

    198d197
    <   const char * end_ptr = ptr;
    204c203
    <   while (*ptr!='\"' && *ptr && ptr <= end_ptr)
    ---
    >   while (*ptr!='\"' && *ptr)
    
     

    Last edit: kolman 2015-02-25
  • kolman

    kolman - 2015-02-24
    static const char *parse_string(cJSON *item,const char *str)
    {
        const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
        if (*str!='\"') {ep=str;return 0;}  /* not a string! */
    
        while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++;  /* Skip escaped quotes. */
        const char * end_ptr = ptr;
    
        out=(char*)cJSON_malloc(len+1); /* This is how long we need for the string, roughly. */
        if (!out) return 0;
    
        ptr=str+1;ptr2=out;
        while (*ptr!='\"' && *ptr && ptr <= end_ptr)
        {
            if (*ptr!='\\') *ptr2++=*ptr++;
            else
            {
                ptr++;
                switch (*ptr)
                {
                    case 'b': *ptr2++='\b'; break;
                    case 'f': *ptr2++='\f'; break;
                    case 'n': *ptr2++='\n'; break;
                    case 'r': *ptr2++='\r'; break;
                    case 't': *ptr2++='\t'; break;
                    case 'u':    /* transcode utf16 to utf8. */
                        uc=parse_hex4(ptr+1);ptr+=4;    /* get the unicode char. */
    
                        if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)    break;  /* check for invalid.   */
    
                        if (uc>=0xD800 && uc<=0xDBFF)   /* UTF16 surrogate pairs.   */
                        {
                            if (ptr[1]!='\\' || ptr[2]!='u')    break;  /* missing second-half of surrogate.    */
                            uc2=parse_hex4(ptr+3);ptr+=6;
                            if (uc2<0xDC00 || uc2>0xDFFF)       break;  /* invalid second-half of surrogate.    */
                            uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
                        }
    
                        len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len;
    
                        switch (len) {
                            case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
                            case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
                            case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
                            case 1: *--ptr2 =(uc | firstByteMark[len]);
                        }
                        ptr2+=len;
                        break;
                    default:  *ptr2++=*ptr; break;
                }
                ptr++;
            }
        }
        *ptr2=0;
        if (*ptr=='\"') ptr++;
        item->valuestring=out;
        item->type=cJSON_String;
        return ptr;
    }
    
     

    Last edit: kolman 2015-02-25
  • Irwan Djajadi

    Irwan Djajadi - 2016-01-15

    Thanks kolman !

    However, we found that the suggested patch is still dangerous when parsing invalid json string such as "\u". We end up referencing the buffer beyond the invalid string and could cause crashes.

    We added kolman's patch to address other issues. Attached is our patch version. Now parse_string function looks like the following:

    static const char *parse_string(cJSON *item,const char *str)
    {
        const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2;
        const char *end_ptr;
        if (*str!='\"') {ep=str;return 0;}  /* not a string! */
    
        while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++;  /* Skip escaped quotes. */
        end_ptr = ptr;
    
        out=(char*)cJSON_malloc(len+1); /* This is how long we need for the string, roughly. */
        if (!out) return 0;
        item->valuestring=out; /* assign here so out will be deleted during cJSON_Delete() later */
        item->type=cJSON_String;
    
        ptr=str+1;ptr2=out;
        while (ptr < end_ptr)
        {
            if (*ptr!='\\') *ptr2++=*ptr++;
            else
            {
                ptr++;
                switch (*ptr)
                {
                    case 'b': *ptr2++='\b'; break;
                    case 'f': *ptr2++='\f'; break;
                    case 'n': *ptr2++='\n'; break;
                    case 'r': *ptr2++='\r'; break;
                    case 't': *ptr2++='\t'; break;
                    case 'u':    /* transcode utf16 to utf8. */
                        uc=parse_hex4(ptr+1);ptr+=4;    /* get the unicode char. */
                        if (ptr >= end_ptr) {ep=str;return 0;} /* invalid */
    
                        if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0)    {ep=str;return 0;}  /* check for invalid.   */
    
                        if (uc>=0xD800 && uc<=0xDBFF)   /* UTF16 surrogate pairs.   */
                        {
                            if (ptr+6 > end_ptr)    {ep=str;return 0;}  /* invalid */
                            if (ptr[1]!='\\' || ptr[2]!='u')    {ep=str;return 0;}  /* missing second-half of surrogate.    */
                            uc2=parse_hex4(ptr+3);ptr+=6;
                            if (uc2<0xDC00 || uc2>0xDFFF)       {ep=str;return 0;}  /* invalid second-half of surrogate.    */
                            uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
                        }
    
                        len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len;
    
                        switch (len) {
                            case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
                            case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
                            case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6;
                            case 1: *--ptr2 =(uc | firstByteMark[len]);
                        }
                        ptr2+=len;
                        break;
                    default:  *ptr2++=*ptr; break;
                }
                ptr++;
            }
        }
        *ptr2=0;
        if (*ptr=='\"') ptr++;
        return ptr;
    }
    
     
  • Dave Gamble

    Dave Gamble - 2016-03-19

    Accepted patch. Everything is moving to github at https://github.com/davegamble/cJSON

    Dave.

     
  • Dave Gamble

    Dave Gamble - 2016-03-19
    • status: unread --> closed-fixed
     

Log in to post a comment.

MongoDB Logo MongoDB