|
From: Gilles D. <gr...@sc...> - 2001-11-23 23:58:37
|
OK, after getting increasingly fed up with all the problems we've had
over the past few years with parsing dates in HTTP headers, using mktime,
strptime, and a whole mess of different variations of format strings
that don't quite always work on all systems, I've decided to go back
to square one and write my own code to do this without any help from C
library functions that aren't quite portable enough. It's not pretty, but
it should solve a lot of the portability problems we've had. It should
also make it possible to remove the hack in the locale handling, where
we set LC_TIME back to the "C" locale, which had been known to cause
locale problems on some systems.
So, unless there are objections from other developers, I'm planning to
put this code into 3.1.6's htdig/Document.cc next week, as well as
eventually into 3.2.0b4's htlib/HtDateTime.cc, to clear up all the
problems we've had. I think it will allow us to completely do away
with strptime and mktime.
I'd appreciate it if you'd have a look at this code and offer your
critique.
#include <ctype.h>
#include <time.h>
#define EPOCH 1970
//
// time_t parsedate(char *date)
// - converts RFC850 or RFC1123 date string into a time value
//
time_t
parsedate(char *date)
{
char *s;
int day, month, year, hour, minute, second;
//
// Two possible time designations:
// Tuesday, 01-Jul-97 16:48:02 GMT (RFC850)
// or
// Thu, 01 May 1997 00:40:42 GMT (RFC1123)
//
// We strip off the weekday because we don't need it, and
// because some servers send invalid weekdays!
// (Some don't even send a weekday, but we'll be flexible...)
s = date;
while (*s && *s != ',')
s++;
if (*s)
s++;
else
s = date;
while (isspace(*s))
s++;
// get day...
if (!isdigit(*s))
return 0;
day = 0;
while (isdigit(*s))
day = day * 10 + (*s++ - '0');
if (day > 31)
return 0;
while (*s == '-' || isspace(*s))
s++;
// get month...
switch (*s++) {
case 'J': case 'j':
switch (*s++) {
case 'A': case 'a':
month = 1;
s++;
break;
case 'U': case 'u':
switch (*s++) {
case 'N': case 'n':
month = 6;
break;
case 'L': case 'l':
month = 7;
break;
default:
return 0;
}
break;
default:
return 0;
}
break;
case 'F': case 'f':
month = 2;
s += 2;
break;
case 'M': case 'm':
switch (*s++) {
case 'A': case 'a':
switch (*s++) {
case 'R': case 'r':
month = 3;
break;
case 'Y': case 'y':
month = 5;
break;
default:
return 0;
}
break;
default:
return 0;
}
break;
case 'A': case 'a':
switch (*s++) {
case 'P': case 'p':
month = 4;
s++;
break;
case 'U': case 'u':
month = 8;
s++;
break;
default:
return 0;
}
break;
case 'S': case 's':
month = 9;
s += 2;
break;
case 'O': case 'o':
month = 10;
s += 2;
break;
case 'N': case 'n':
month = 11;
s += 2;
break;
case 'D': case 'd':
month = 12;
s += 2;
break;
default:
return 0;
}
while (*s == '-' || isspace(*s))
s++;
// get year...
if (!isdigit(*s))
return 0;
year = 0;
while (isdigit(*s))
year = year * 10 + (*s++ - '0');
if (year < 69)
year += 2000;
else if (year < 1900)
year += 1900;
else if (year >= 19100) // seen some programs do it, why not check?
year -= (19100-2000);
while (isspace(*s))
s++;
// get hour...
if (!isdigit(*s))
return 0;
hour = 0;
while (isdigit(*s))
hour = hour * 10 + (*s++ - '0');
if (hour > 23)
return 0;
while (*s == ':' || isspace(*s))
s++;
// get minute...
if (!isdigit(*s))
return 0;
minute = 0;
while (isdigit(*s))
minute = minute * 10 + (*s++ - '0');
if (minute > 59)
return 0;
while (*s == ':' || isspace(*s))
s++;
// get second...
if (!isdigit(*s))
return 0;
second = 0;
while (isdigit(*s))
second = second * 10 + (*s++ - '0');
if (second > 59)
return 0;
while (*s == ':' || isspace(*s))
s++;
//
// Calculate date as seconds since 01 Jan 1970 00:00:00 GMT
// This is based somewhat on the date calculation code in NetBSD's
// cd9660_node.c code, for which I was unable to find a reference.
// It works, though!
//
return (time_t) (((((367L*year - 7L*(year+(month+9)/12)/4
- 3L*(((year)+((month)+9)/12-1)/100+1)/4
+ 275L*(month)/9 + day) -
(367L*EPOCH - 7L*(EPOCH+(1+9)/12)/4
- 3L*((EPOCH+(1+9)/12-1)/100+1)/4
+ 275L*1/9 + 1))
* 24 + hour) * 60 + minute) * 60 + second);
}
#ifdef TEST
#include <stdio.h>
main()
{
char buf[100];
time_t t;
while (fgets(buf, sizeof(buf), stdin)) {
t = parsedate(buf);
fputs(ctime(&t), stdout);
}
return 0;
}
#endif
--
Gilles R. Detillieux E-mail: <gr...@sc...>
Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba Phone: (204)789-3766
Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930
|