[Extractor-gtk-cvslog] SF.net SVN: extractor-gtk:[97] trunk/extractor/generic.c
Extract files from unusual archive formats
Brought to you by:
someone-guy
|
From: <som...@us...> - 2008-08-22 16:08:49
|
Revision: 97
http://extractor-gtk.svn.sourceforge.net/extractor-gtk/?rev=97&view=rev
Author: someone-guy
Date: 2008-08-22 16:08:44 +0000 (Fri, 22 Aug 2008)
Log Message:
-----------
Optimize generic file parsing.
Modified Paths:
--------------
trunk/extractor/generic.c
Modified: trunk/extractor/generic.c
===================================================================
--- trunk/extractor/generic.c 2008-08-22 16:08:20 UTC (rev 96)
+++ trunk/extractor/generic.c 2008-08-22 16:08:44 UTC (rev 97)
@@ -31,56 +31,99 @@
typedef uint64_t buffer_t;
+#define RAMBUF_SZ (256 * 1024)
+
+static inline uint16_t get_le16(uint8_t **p) {
+ uint16_t res = *(*p)++;
+ res += *(*p)++ << 8;
+ return res;
+}
+
+static inline uint16_t get_be16(uint8_t **p) {
+ uint16_t res = *(*p)++ << 8;
+ res += *(*p)++;
+ return res;
+}
+
+static inline uint32_t get_le32(uint8_t **p) {
+ uint32_t res = get_le16(p);
+ res += get_le16(p) << 16;
+ return res;
+}
+
+static inline uint32_t get_be32(uint8_t **p) {
+ uint32_t res = get_be16(p) << 16;
+ res += get_be16(p);
+ return res;
+}
+
static file_t *get_list(FILE *in) {
register buffer_t t = 0;
+ uint8_t *rambuf = malloc(2 * RAMBUF_SZ);
+ uint64_t fpos = 0;
+ int rambuf_used = 0;
+ int rambuf_pos = 0;
int bz2idx = -1;
int pngidx = -1;
int cnt = 0;
file_t *list = calloc(1, sizeof(file_t));
- rewind(in);
- while (!feof(in) && cnt < MAX_FILES) {
+ while (cnt < MAX_FILES) {
+ uint8_t *p;
int i;
- t = t << 8 | fgetc(in);
+ if (rambuf_pos >= rambuf_used || rambuf_pos >= RAMBUF_SZ) {
+ if (rambuf_pos >= RAMBUF_SZ) {
+ rambuf_pos -= RAMBUF_SZ;
+ rambuf_used -= RAMBUF_SZ;
+ memcpy(rambuf, rambuf + RAMBUF_SZ, RAMBUF_SZ);
+ } else {
+ rambuf_pos = rambuf_used = 0;
+ }
+ rambuf_used += fread(rambuf + rambuf_used, 1, 2 * RAMBUF_SZ - rambuf_used, in);
+ if (rambuf_used <= 0) break; // EOF
+ }
+ t = t << 8 | rambuf[rambuf_pos++];
+ p = rambuf + rambuf_pos;
+ fpos++;
switch (t) {
case HUGETAG(0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1):
add_entry(&list, cnt, "msi");
- list[cnt].start = ftell(in) - 8;
+ list[cnt].start = fpos - 8;
cnt++;
break;
case HUGETAG(0x89, 'P', 'N', 'G', 0x0d, 0x0a, 0x1a, 0x0a):
if (pngidx >= 0 && list[pngidx].len == 0)
- list[pngidx].len = ftell(in) - list[pngidx].start - 8;
+ list[pngidx].len = fpos - list[pngidx].start - 8;
add_entry(&list, cnt, "png");
- list[cnt].start = ftell(in) - 8;
+ list[cnt].start = fpos - 8;
pngidx = cnt;
cnt++;
break;
case HUGETAG(0, 0, 0, 0, 'I', 'E', 'N', 'D'):
if (pngidx >= 0)
- list[pngidx].len = ftell(in) - list[pngidx].start + 4;
+ list[pngidx].len = fpos - list[pngidx].start + 4;
break;
}
- switch (t & 0xffffffff) {
+ switch ((uint32_t)t) {
case TAG('M', 'S', 'C', 'F'): {
- uint32_t unk = read_le32(in);
- uint32_t size = read_le32(in);
- fseek(in, -8, SEEK_CUR);
+ uint32_t unk = get_le32(&p);
+ uint32_t size = get_le32(&p);
+ p -= 8;
add_entry(&list, cnt, "cab");
- list[cnt].start = ftell(in) - 4;
+ list[cnt].start = fpos - 4;
list[cnt].len = size;
cnt++;
break;
}
case TAG('R', 'I', 'F', 'F'): {
const char *ext = "riff";
- uint32_t len = read_le32(in);
- uint32_t type = read_be32(in);
- fseek(in, -8, SEEK_CUR);
+ uint32_t len = get_le32(&p);
+ uint32_t type = get_be32(&p);
+ p -= 8;
if (len <= 4 || !is_valid_fourcc(type)) break;
if (type == TAG('W', 'A', 'V', 'E')) ext = "wav";
if (type >> 8 == TAG(0, 'A', 'V', 'I')) ext = "avi";
add_entry(&list, cnt, ext);
- list[cnt].start = ftell(in) - 4;
+ list[cnt].start = fpos - 4;
list[cnt].len = len + 8;
cnt++;
break;
@@ -88,16 +131,16 @@
case TAG('P', 'K', 5, 6): {
uint64_t offset;
uint16_t comment_len;
- fseek(in, 8, SEEK_CUR);
- offset = read_le32(in); // size of central directory
- offset += read_le32(in); // offset of central directory
+ p += 8;
+ offset = get_le32(&p); // size of central directory
+ offset += get_le32(&p); // offset of central directory
offset += 4; // PK..
- comment_len = read_le16(in); // comment length
- fseek(in, -18, SEEK_CUR);
- if (offset > ftell(in))
+ comment_len = get_le16(&p); // comment length
+ p -= 18;
+ if (offset > fpos)
break;
add_entry(&list, cnt, "zip");
- list[cnt].start = ftell(in) - offset;
+ list[cnt].start = fpos - offset;
list[cnt].len = offset + 18 + comment_len;
cnt++;
}
@@ -107,49 +150,49 @@
// compressed flash. Note that length is _decompressed_ size
// so we will extract more data than necessary
case TAG(0, 'C', 'W', 'S'): {
- uint8_t ver = fgetc(in);
- uint32_t size = read_le32(in);
- uint8_t rectbits = fgetc(in) >> 3;
- fseek(in, -6, SEEK_CUR);
+ uint8_t ver = *p++;
+ uint32_t size = get_le32(&p);
+ uint8_t rectbits = *p++ >> 3;
+ p -= 6;
if ((ver & 0xf0) || rectbits != 15)
break;
add_entry(&list, cnt, "swf");
- list[cnt].start = ftell(in) - 3;
+ list[cnt].start = fpos - 3;
list[cnt].len = size;
cnt++;
break;
}
case TAG(0, 'B', 'Z', 'h'):
if (bz2idx >= 0 && list[bz2idx].len == 0) {
- list[bz2idx].start = ftell(in) - 3;
+ list[bz2idx].start = fpos - 3;
break;
}
add_entry(&list, cnt, "bz2");
- list[cnt].start = ftell(in) - 3;
+ list[cnt].start = fpos - 3;
bz2idx = cnt;
cnt++;
break;
case 0xffd8ff:
add_entry(&list, cnt, "jpg");
- list[cnt].start = ftell(in) - 3;
+ list[cnt].start = fpos - 3;
list[cnt].priv = (void *)1;
cnt++;
break;
}
- switch (t & 0xffff) {
+ switch ((uint16_t)t) {
case TAG(0, 0, 'B', 'M'): {
- uint32_t len = read_le32(in);
- uint16_t res1 = read_le16(in);
- uint16_t res2 = read_le16(in);
- uint32_t dataoff = read_le32(in);
- uint32_t bisize = read_le32(in);
- fseek(in, -16, SEEK_CUR);
+ uint32_t len = get_le32(&p);
+ uint16_t res1 = get_le16(&p);
+ uint16_t res2 = get_le16(&p);
+ uint32_t dataoff = get_le32(&p);
+ uint32_t bisize = get_le32(&p);
+ p -= 16;
if (bisize < 40 || bisize > 1024*1024 ||
dataoff < bisize + 14 || len < dataoff ||
len > 1024*1024*1024)
break;
add_entry(&list, cnt, "bmp");
- list[cnt].start = ftell(in) - 2;
+ list[cnt].start = fpos - 2;
list[cnt].len = len;
cnt++;
break;
@@ -157,7 +200,7 @@
case 0xffd9:
for (i = 0; i < cnt; i++) {
if (list[i].priv == (void *)1 && list[i].len == 0)
- list[i].len = ftell(in) - list[i].start;
+ list[i].len = fpos - list[i].start;
}
break;
}
@@ -166,17 +209,19 @@
// if buffer_t is changed to only 32 bits, otherwise all 48 bits
buffer_t endmark = 0x177245385090ULL;
buffer_t mask = 0xffffffffffffULL;
- for (i = 0; i < 8; i++) {
+ i = 8;
+ do {
if ((t & mask) == endmark) {
// end marker plus space for CRC
- list[bz2idx].len = ftell(in) - list[bz2idx].start + 4;
+ list[bz2idx].len = fpos - list[bz2idx].start + 4;
break;
}
endmark <<= 1;
mask <<= 1;
- }
+ } while (--i);
}
}
+ free(rambuf);
return list;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|