Thread: [Extractor-gtk-cvslog] SF.net SVN: extractor-gtk:[97] trunk/extractor/generic.c
Extract files from unusual archive formats
Brought to you by:
someone-guy
From: <som...@us...> - 2008-08-22 16:08:49
|
Revision: 97 http://extractor-gtk.svn.sourceforge.net/extractor-gtk/?rev=97&view=rev Author: someone-guy Date: 2008-08-22 16:08:44 +0000 (Fri, 22 Aug 2008) Log Message: ----------- Optimize generic file parsing. Modified Paths: -------------- trunk/extractor/generic.c Modified: trunk/extractor/generic.c =================================================================== --- trunk/extractor/generic.c 2008-08-22 16:08:20 UTC (rev 96) +++ trunk/extractor/generic.c 2008-08-22 16:08:44 UTC (rev 97) @@ -31,56 +31,99 @@ typedef uint64_t buffer_t; +#define RAMBUF_SZ (256 * 1024) + +static inline uint16_t get_le16(uint8_t **p) { + uint16_t res = *(*p)++; + res += *(*p)++ << 8; + return res; +} + +static inline uint16_t get_be16(uint8_t **p) { + uint16_t res = *(*p)++ << 8; + res += *(*p)++; + return res; +} + +static inline uint32_t get_le32(uint8_t **p) { + uint32_t res = get_le16(p); + res += get_le16(p) << 16; + return res; +} + +static inline uint32_t get_be32(uint8_t **p) { + uint32_t res = get_be16(p) << 16; + res += get_be16(p); + return res; +} + static file_t *get_list(FILE *in) { register buffer_t t = 0; + uint8_t *rambuf = malloc(2 * RAMBUF_SZ); + uint64_t fpos = 0; + int rambuf_used = 0; + int rambuf_pos = 0; int bz2idx = -1; int pngidx = -1; int cnt = 0; file_t *list = calloc(1, sizeof(file_t)); - rewind(in); - while (!feof(in) && cnt < MAX_FILES) { + while (cnt < MAX_FILES) { + uint8_t *p; int i; - t = t << 8 | fgetc(in); + if (rambuf_pos >= rambuf_used || rambuf_pos >= RAMBUF_SZ) { + if (rambuf_pos >= RAMBUF_SZ) { + rambuf_pos -= RAMBUF_SZ; + rambuf_used -= RAMBUF_SZ; + memcpy(rambuf, rambuf + RAMBUF_SZ, RAMBUF_SZ); + } else { + rambuf_pos = rambuf_used = 0; + } + rambuf_used += fread(rambuf + rambuf_used, 1, 2 * RAMBUF_SZ - rambuf_used, in); + if (rambuf_used <= 0) break; // EOF + } + t = t << 8 | rambuf[rambuf_pos++]; + p = rambuf + rambuf_pos; + fpos++; switch (t) { case HUGETAG(0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1): add_entry(&list, cnt, "msi"); - list[cnt].start = ftell(in) - 8; + list[cnt].start = fpos - 8; cnt++; break; case HUGETAG(0x89, 'P', 'N', 'G', 0x0d, 0x0a, 0x1a, 0x0a): if (pngidx >= 0 && list[pngidx].len == 0) - list[pngidx].len = ftell(in) - list[pngidx].start - 8; + list[pngidx].len = fpos - list[pngidx].start - 8; add_entry(&list, cnt, "png"); - list[cnt].start = ftell(in) - 8; + list[cnt].start = fpos - 8; pngidx = cnt; cnt++; break; case HUGETAG(0, 0, 0, 0, 'I', 'E', 'N', 'D'): if (pngidx >= 0) - list[pngidx].len = ftell(in) - list[pngidx].start + 4; + list[pngidx].len = fpos - list[pngidx].start + 4; break; } - switch (t & 0xffffffff) { + switch ((uint32_t)t) { case TAG('M', 'S', 'C', 'F'): { - uint32_t unk = read_le32(in); - uint32_t size = read_le32(in); - fseek(in, -8, SEEK_CUR); + uint32_t unk = get_le32(&p); + uint32_t size = get_le32(&p); + p -= 8; add_entry(&list, cnt, "cab"); - list[cnt].start = ftell(in) - 4; + list[cnt].start = fpos - 4; list[cnt].len = size; cnt++; break; } case TAG('R', 'I', 'F', 'F'): { const char *ext = "riff"; - uint32_t len = read_le32(in); - uint32_t type = read_be32(in); - fseek(in, -8, SEEK_CUR); + uint32_t len = get_le32(&p); + uint32_t type = get_be32(&p); + p -= 8; if (len <= 4 || !is_valid_fourcc(type)) break; if (type == TAG('W', 'A', 'V', 'E')) ext = "wav"; if (type >> 8 == TAG(0, 'A', 'V', 'I')) ext = "avi"; add_entry(&list, cnt, ext); - list[cnt].start = ftell(in) - 4; + list[cnt].start = fpos - 4; list[cnt].len = len + 8; cnt++; break; @@ -88,16 +131,16 @@ case TAG('P', 'K', 5, 6): { uint64_t offset; uint16_t comment_len; - fseek(in, 8, SEEK_CUR); - offset = read_le32(in); // size of central directory - offset += read_le32(in); // offset of central directory + p += 8; + offset = get_le32(&p); // size of central directory + offset += get_le32(&p); // offset of central directory offset += 4; // PK.. - comment_len = read_le16(in); // comment length - fseek(in, -18, SEEK_CUR); - if (offset > ftell(in)) + comment_len = get_le16(&p); // comment length + p -= 18; + if (offset > fpos) break; add_entry(&list, cnt, "zip"); - list[cnt].start = ftell(in) - offset; + list[cnt].start = fpos - offset; list[cnt].len = offset + 18 + comment_len; cnt++; } @@ -107,49 +150,49 @@ // compressed flash. Note that length is _decompressed_ size // so we will extract more data than necessary case TAG(0, 'C', 'W', 'S'): { - uint8_t ver = fgetc(in); - uint32_t size = read_le32(in); - uint8_t rectbits = fgetc(in) >> 3; - fseek(in, -6, SEEK_CUR); + uint8_t ver = *p++; + uint32_t size = get_le32(&p); + uint8_t rectbits = *p++ >> 3; + p -= 6; if ((ver & 0xf0) || rectbits != 15) break; add_entry(&list, cnt, "swf"); - list[cnt].start = ftell(in) - 3; + list[cnt].start = fpos - 3; list[cnt].len = size; cnt++; break; } case TAG(0, 'B', 'Z', 'h'): if (bz2idx >= 0 && list[bz2idx].len == 0) { - list[bz2idx].start = ftell(in) - 3; + list[bz2idx].start = fpos - 3; break; } add_entry(&list, cnt, "bz2"); - list[cnt].start = ftell(in) - 3; + list[cnt].start = fpos - 3; bz2idx = cnt; cnt++; break; case 0xffd8ff: add_entry(&list, cnt, "jpg"); - list[cnt].start = ftell(in) - 3; + list[cnt].start = fpos - 3; list[cnt].priv = (void *)1; cnt++; break; } - switch (t & 0xffff) { + switch ((uint16_t)t) { case TAG(0, 0, 'B', 'M'): { - uint32_t len = read_le32(in); - uint16_t res1 = read_le16(in); - uint16_t res2 = read_le16(in); - uint32_t dataoff = read_le32(in); - uint32_t bisize = read_le32(in); - fseek(in, -16, SEEK_CUR); + uint32_t len = get_le32(&p); + uint16_t res1 = get_le16(&p); + uint16_t res2 = get_le16(&p); + uint32_t dataoff = get_le32(&p); + uint32_t bisize = get_le32(&p); + p -= 16; if (bisize < 40 || bisize > 1024*1024 || dataoff < bisize + 14 || len < dataoff || len > 1024*1024*1024) break; add_entry(&list, cnt, "bmp"); - list[cnt].start = ftell(in) - 2; + list[cnt].start = fpos - 2; list[cnt].len = len; cnt++; break; @@ -157,7 +200,7 @@ case 0xffd9: for (i = 0; i < cnt; i++) { if (list[i].priv == (void *)1 && list[i].len == 0) - list[i].len = ftell(in) - list[i].start; + list[i].len = fpos - list[i].start; } break; } @@ -166,17 +209,19 @@ // if buffer_t is changed to only 32 bits, otherwise all 48 bits buffer_t endmark = 0x177245385090ULL; buffer_t mask = 0xffffffffffffULL; - for (i = 0; i < 8; i++) { + i = 8; + do { if ((t & mask) == endmark) { // end marker plus space for CRC - list[bz2idx].len = ftell(in) - list[bz2idx].start + 4; + list[bz2idx].len = fpos - list[bz2idx].start + 4; break; } endmark <<= 1; mask <<= 1; - } + } while (--i); } } + free(rambuf); return list; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <som...@us...> - 2008-08-22 16:09:28
|
Revision: 99 http://extractor-gtk.svn.sourceforge.net/extractor-gtk/?rev=99&view=rev Author: someone-guy Date: 2008-08-22 16:09:24 +0000 (Fri, 22 Aug 2008) Log Message: ----------- re-add incorrectly removed rewind() Modified Paths: -------------- trunk/extractor/generic.c Modified: trunk/extractor/generic.c =================================================================== --- trunk/extractor/generic.c 2008-08-22 16:09:05 UTC (rev 98) +++ trunk/extractor/generic.c 2008-08-22 16:09:24 UTC (rev 99) @@ -67,6 +67,7 @@ int pngidx = -1; int cnt = 0; file_t *list = calloc(1, sizeof(file_t)); + rewind(in); while (cnt < MAX_FILES) { uint8_t *p; int i; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <som...@us...> - 2009-02-08 12:42:11
|
Revision: 114 http://extractor-gtk.svn.sourceforge.net/extractor-gtk/?rev=114&view=rev Author: someone-guy Date: 2009-02-08 12:42:05 +0000 (Sun, 08 Feb 2009) Log Message: ----------- Add detection for DV headers. Modified Paths: -------------- trunk/extractor/generic.c Modified: trunk/extractor/generic.c =================================================================== --- trunk/extractor/generic.c 2008-09-17 19:00:18 UTC (rev 113) +++ trunk/extractor/generic.c 2009-02-08 12:42:05 UTC (rev 114) @@ -57,6 +57,18 @@ return res; } +static inline uint64_t get_le64(uint8_t **p) { + uint64_t res = get_le32(p); + res += (uint64_t)get_le32(p) << 32; + return res; +} + +static inline uint64_t get_be64(uint8_t **p) { + uint64_t res = (uint64_t)get_be32(p) << 32; + res += get_be32(p); + return res; +} + static file_t *get_list(FILE *in) { register buffer_t t = 0; uint8_t *rambuf = malloc(2 * RAMBUF_SZ); @@ -85,6 +97,7 @@ t = t << 8 | rambuf[rambuf_pos++]; p = rambuf + rambuf_pos; fpos++; + // NOTE: at least RAMBUF_SZ bytes after p are valid (unless EOF was reached) switch (t) { case HUGETAG(0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1): add_entry(&list, cnt, "msi"); @@ -105,6 +118,18 @@ break; } switch ((uint32_t)t) { + case 0x1f07003f: + case 0x1f0700bf: { + uint32_t dvhdr = get_be32(&p); + uint64_t padding = get_be64(&p); + p -= 12; + if (padding != 0xffffffffffffffff) break; + add_entry(&list, cnt, "dv"); + list[cnt].start = fpos - 4; + list[cnt].len = 0; + cnt++; + break; + } case TAG('M', 'S', 'C', 'F'): { uint32_t unk = get_le32(&p); uint32_t size = get_le32(&p); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <som...@us...> - 2010-02-18 19:50:48
|
Revision: 121 http://extractor-gtk.svn.sourceforge.net/extractor-gtk/?rev=121&view=rev Author: someone-guy Date: 2010-02-18 19:50:36 +0000 (Thu, 18 Feb 2010) Log Message: ----------- Add support for detecting and extracting PE executable files. Modified Paths: -------------- trunk/extractor/generic.c Modified: trunk/extractor/generic.c =================================================================== --- trunk/extractor/generic.c 2009-09-03 14:15:13 UTC (rev 120) +++ trunk/extractor/generic.c 2010-02-18 19:50:36 UTC (rev 121) @@ -220,6 +220,43 @@ break; } switch ((uint16_t)t) { + case TAG(0, 0, 'M', 'Z'): { + // PE exe files + int pe_size = 0; + uint8_t *start = p; + int optsize; + uint32_t pe_offset; + p += 0x3c - 2; + pe_offset = get_le32(&p); + if (pe_offset < 0x40 || pe_offset > RAMBUF_SZ / 2) + break; + p += pe_offset - 0x40; + if (get_be32(&p) != TAG('P', 'E', 0, 0)) + break; + p += 0x10; + optsize = get_le16(&p); + p += 2; + if (p - start + optsize >= RAMBUF_SZ / 2) + break; + p += optsize; + while (*p) { + // find end of last section + int sect_end; + if (p - start + 40 >= RAMBUF_SZ / 2) + break; + p += 16; + sect_end = get_le32(&p); // size + sect_end += get_le32(&p); // offset + if (sect_end > pe_size) + pe_size = sect_end; + p += 16; + } while (*p && p - start + 40 < RAMBUF_SZ / 2); + add_entry(&list, cnt, "exe"); + list[cnt].start = fpos - 2; + list[cnt].len = pe_size; + cnt++; + break; + } case TAG(0, 0, 'B', 'M'): { uint32_t len = get_le32(&p); uint16_t res1 = get_le16(&p); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |