From: Gilles D. <gr...@sc...> - 2001-11-23 23:58:37
|
OK, after getting increasingly fed up with all the problems we've had over the past few years with parsing dates in HTTP headers, using mktime, strptime, and a whole mess of different variations of format strings that don't quite always work on all systems, I've decided to go back to square one and write my own code to do this without any help from C library functions that aren't quite portable enough. It's not pretty, but it should solve a lot of the portability problems we've had. It should also make it possible to remove the hack in the locale handling, where we set LC_TIME back to the "C" locale, which had been known to cause locale problems on some systems. So, unless there are objections from other developers, I'm planning to put this code into 3.1.6's htdig/Document.cc next week, as well as eventually into 3.2.0b4's htlib/HtDateTime.cc, to clear up all the problems we've had. I think it will allow us to completely do away with strptime and mktime. I'd appreciate it if you'd have a look at this code and offer your critique. #include <ctype.h> #include <time.h> #define EPOCH 1970 // // time_t parsedate(char *date) // - converts RFC850 or RFC1123 date string into a time value // time_t parsedate(char *date) { char *s; int day, month, year, hour, minute, second; // // Two possible time designations: // Tuesday, 01-Jul-97 16:48:02 GMT (RFC850) // or // Thu, 01 May 1997 00:40:42 GMT (RFC1123) // // We strip off the weekday because we don't need it, and // because some servers send invalid weekdays! // (Some don't even send a weekday, but we'll be flexible...) s = date; while (*s && *s != ',') s++; if (*s) s++; else s = date; while (isspace(*s)) s++; // get day... if (!isdigit(*s)) return 0; day = 0; while (isdigit(*s)) day = day * 10 + (*s++ - '0'); if (day > 31) return 0; while (*s == '-' || isspace(*s)) s++; // get month... switch (*s++) { case 'J': case 'j': switch (*s++) { case 'A': case 'a': month = 1; s++; break; case 'U': case 'u': switch (*s++) { case 'N': case 'n': month = 6; break; case 'L': case 'l': month = 7; break; default: return 0; } break; default: return 0; } break; case 'F': case 'f': month = 2; s += 2; break; case 'M': case 'm': switch (*s++) { case 'A': case 'a': switch (*s++) { case 'R': case 'r': month = 3; break; case 'Y': case 'y': month = 5; break; default: return 0; } break; default: return 0; } break; case 'A': case 'a': switch (*s++) { case 'P': case 'p': month = 4; s++; break; case 'U': case 'u': month = 8; s++; break; default: return 0; } break; case 'S': case 's': month = 9; s += 2; break; case 'O': case 'o': month = 10; s += 2; break; case 'N': case 'n': month = 11; s += 2; break; case 'D': case 'd': month = 12; s += 2; break; default: return 0; } while (*s == '-' || isspace(*s)) s++; // get year... if (!isdigit(*s)) return 0; year = 0; while (isdigit(*s)) year = year * 10 + (*s++ - '0'); if (year < 69) year += 2000; else if (year < 1900) year += 1900; else if (year >= 19100) // seen some programs do it, why not check? year -= (19100-2000); while (isspace(*s)) s++; // get hour... if (!isdigit(*s)) return 0; hour = 0; while (isdigit(*s)) hour = hour * 10 + (*s++ - '0'); if (hour > 23) return 0; while (*s == ':' || isspace(*s)) s++; // get minute... if (!isdigit(*s)) return 0; minute = 0; while (isdigit(*s)) minute = minute * 10 + (*s++ - '0'); if (minute > 59) return 0; while (*s == ':' || isspace(*s)) s++; // get second... if (!isdigit(*s)) return 0; second = 0; while (isdigit(*s)) second = second * 10 + (*s++ - '0'); if (second > 59) return 0; while (*s == ':' || isspace(*s)) s++; // // Calculate date as seconds since 01 Jan 1970 00:00:00 GMT // This is based somewhat on the date calculation code in NetBSD's // cd9660_node.c code, for which I was unable to find a reference. // It works, though! // return (time_t) (((((367L*year - 7L*(year+(month+9)/12)/4 - 3L*(((year)+((month)+9)/12-1)/100+1)/4 + 275L*(month)/9 + day) - (367L*EPOCH - 7L*(EPOCH+(1+9)/12)/4 - 3L*((EPOCH+(1+9)/12-1)/100+1)/4 + 275L*1/9 + 1)) * 24 + hour) * 60 + minute) * 60 + second); } #ifdef TEST #include <stdio.h> main() { char buf[100]; time_t t; while (fgets(buf, sizeof(buf), stdin)) { t = parsedate(buf); fputs(ctime(&t), stdout); } return 0; } #endif -- Gilles R. Detillieux E-mail: <gr...@sc...> Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil Dept. Physiology, U. of Manitoba Phone: (204)789-3766 Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930 |