You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdepim/mimelib/dw_date.cpp

811 lines
20 KiB

//=============================================================================
// File: dw_date.cpp
// Contents: Date parsing function
// Maintainer: Doug Sauder <dwsauder@fwb.gulf.net>
// WWW: http://www.fwb.gulf.net/~dwsauder/mimepp.html
//
// Copyright (c) 1996, 1997 Douglas W. Sauder
// All rights reserved.
//
// IN NO EVENT SHALL DOUGLAS W. SAUDER BE LIABLE TO ANY PARTY FOR DIRECT,
// INDIRECT, SPECIAL, INCIDENTAL, OR CONSETQUENTIAL DAMAGES ARISING OUT OF
// THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF DOUGLAS W. SAUDER
// HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// DOUGLAS W. SAUDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT
// NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
// BASIS, AND DOUGLAS W. SAUDER HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
// SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
//
//=============================================================================
/*
* For maximum code reuse, the functions in this file are written in C.
*/
#include <mimelib/config.h>
#include <mimelib/debug.h>
#include <ctype.h>
#include <time.h>
static int CommentLength(const char *str)
{
int ch, pos, level, quoteNext, done, len;
level = 0;
quoteNext = 0;
pos = 0;
len = 0;
ch = str[pos];
done = 0;
while (1) {
switch (ch) {
case 0:
len = pos;
done = 1;
break;
case '\\':
quoteNext = 1;
break;
case '(':
if (!quoteNext) {
++level;
}
quoteNext = 0;
break;
case ')':
if (!quoteNext) {
--level;
if (level == 0) {
len = pos + 1;
done = 1;
}
}
quoteNext = 0;
break;
default:
quoteNext = 0;
}
if (done) {
break;
}
++pos;
ch = str[pos];
}
return len;
}
/*
* ParseRfc822Date() -- Parse a date in RFC-822 (RFC-1123) format
*
* If the parsing succeeds:
* - tms is set to contain the year, month, day, hour, minute, and second
* - z is set to contain the time zone in minutes offset from UTC
* - 0 is returned
* If the parsing fails:
* - (-1) is returned
* - the information in tms and z is undefined
*/
#ifdef __cplusplus
extern "C"
#endif
int ParseRfc822Date(const char *str, struct tm *tms, int *z)
{
int pos, ch, n, sgn, numZoneDigits;
int day=1, month=0, year=1970, hour=0, minute=0, second=0, zone=0;
int isValid = 1;
if (!str) {
return -1;
}
/*
* Ignore optional day of the week.
*/
/*
* Day -- one or two digits
*/
/* -- skip over non-digits */
pos = 0;
ch = str[pos];
while (ch && !('0' <= ch && ch <= '9')) {
if (ch == '(') {
pos += CommentLength(&str[pos]);
}
else {
++pos;
}
ch = str[pos];
}
/* -- convert next one or two digits */
n = -1;
if ('0' <= ch && ch <= '9') {
n = ch - '0';
++pos;
ch = str[pos];
}
if ('0' <= ch && ch <= '9') {
n *= 10;
n += ch - '0';
++pos;
ch = str[pos];
}
if (1 <= n && n <= 31) {
day = n;
}
else {
isValid = 0;
}
/*
* Month. Use case-insensitive string compare for added robustness
*/
/* -- skip over chars to first possible month char */
while (ch && !('A' <= ch && ch <= 'S') && !('a' <= ch && ch <= 's')) {
if (ch == '(') {
pos += CommentLength(&str[pos]);
}
else {
++pos;
}
ch = str[pos];
}
/* -- convert the month name */
n = -1;
switch (ch) {
case 'A':
case 'a':
/* Apr */
if ((str[pos+1] == 'p' || str[pos+1] == 'P')
&& (str[pos+2] == 'r' || str[pos+2] == 'R')) {
n = 3;
pos += 3;
ch = str[pos];
}
/* Aug */
else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
&& (str[pos+2] == 'g' || str[pos+2] == 'G')) {
n = 7;
pos += 3;
ch = str[pos];
}
break;
case 'D':
case 'd':
/* Dec */
if ((str[pos+1] == 'e' || str[pos+1] == 'E')
&& (str[pos+2] == 'c' || str[pos+2] == 'C')) {
n = 11;
pos += 3;
ch = str[pos];
}
break;
case 'F':
case 'f':
/* Feb */
if ((str[pos+1] == 'e' || str[pos+1] == 'E')
&& (str[pos+2] == 'b' || str[pos+2] == 'B')) {
n = 1;
pos += 3;
ch = str[pos];
}
break;
case 'J':
case 'j':
/* Jan */
if ((str[pos+1] == 'a' || str[pos+1] == 'A')
&& (str[pos+2] == 'n' || str[pos+2] == 'N')) {
n = 0;
pos += 3;
ch = str[pos];
}
/* Jul */
else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
&& (str[pos+2] == 'l' || str[pos+2] == 'L')) {
n = 6;
pos += 3;
ch = str[pos];
}
/* Jun */
else if ((str[pos+1] == 'u' || str[pos+1] == 'U')
&& (str[pos+2] == 'n' || str[pos+2] == 'N')) {
n = 5;
pos += 3;
ch = str[pos];
}
break;
case 'M':
case 'm':
/* Mar */
if ((str[pos+1] == 'a' || str[pos+1] == 'A')
&& (str[pos+2] == 'r' || str[pos+2] == 'R')) {
n = 2;
pos += 3;
ch = str[pos];
}
/* May */
else if ((str[pos+1] == 'a' || str[pos+1] == 'A')
&& (str[pos+2] == 'y' || str[pos+2] == 'Y')) {
n = 4;
pos += 3;
ch = str[pos];
}
break;
case 'N':
case 'n':
/* Nov */
if ((str[pos+1] == 'o' || str[pos+1] == 'O')
&& (str[pos+2] == 'v' || str[pos+2] == 'V')) {
n = 10;
pos += 3;
ch = str[pos];
}
break;
case 'O':
case 'o':
/* Oct */
if ((str[pos+1] == 'c' || str[pos+1] == 'C')
&& (str[pos+2] == 't' || str[pos+2] == 'T')) {
n = 9;
pos += 3;
ch = str[pos];
}
break;
case 'S':
case 's':
/* Sep */
if ((str[pos+1] == 'e' || str[pos+1] == 'E')
&& (str[pos+2] == 'p' || str[pos+2] == 'P')) {
n = 8;
pos += 3;
ch = str[pos];
}
break;
}
if (0 <= n && n <= 11) {
month = n;
}
else {
isValid = 0;
}
/*
* Year -- two or four digits (four preferred)
*/
/* -- skip over non-digits */
while (ch && !('0' <= ch && ch <= '9')) {
if (ch == '(') {
pos += CommentLength(&str[pos]);
}
else {
++pos;
}
ch = str[pos];
}
/* -- convert up to four digits */
n = -1;
if ('0' <= ch && ch <= '9') {
n = ch - '0';
++pos;
ch = str[pos];
}
if ('0' <= ch && ch <= '9') {
n *= 10;
n += ch - '0';
++pos;
ch = str[pos];
}
if ('0' <= ch && ch <= '9') {
n *= 10;
n += ch - '0';
++pos;
ch = str[pos];
}
if ('0' <= ch && ch <= '9') {
n *= 10;
n += ch - '0';
++pos;
ch = str[pos];
}
if (n != -1) {
/* Fixed year 2000 problem (fix by tony@lasernet.globalnet.co.uk) */
if (n < 70)
n += 2000; /* When less than 70 assume after year 2000 */
else if (n <= 99)
n += 1900; /* When >69 and <100 assume 1970 to 1999 */
/* Additional check to limit valid range to 1970 to 2037 */
if ((n >= 1970) && (n < 2038))
year = n;
else
isValid = 0;
}
else {
isValid = 0;
}
/*
* Hour -- two digits
*/
/* -- skip over non-digits */
while (ch && !('0' <= ch && ch <= '9')) {
if (ch == '(') {
pos += CommentLength(&str[pos]);
}
else {
++pos;
}
ch = str[pos];
}
/* -- convert next one or two digits */
n = -1;
if ('0' <= ch && ch <= '9') {
n = ch - '0';
++pos;
ch = str[pos];
}
if ('0' <= ch && ch <= '9') {
n *= 10;
n += ch - '0';
++pos;
ch = str[pos];
}
if (0 <= n && n <= 23) {
hour = n;
}
else {
isValid = 0;
}
/*
* Minute -- two digits
*/
/* -- scan for ':' */
while (ch && ch != ':') {
if (ch == '(') {
pos += CommentLength(&str[pos]);
}
else {
++pos;
}
ch = str[pos];
}
/* -- skip over non-digits */
while (ch && !('0' <= ch && ch <= '9')) {
if (ch == '(') {
pos += CommentLength(&str[pos]);
}
else {
++pos;
}
ch = str[pos];
}
/* -- convert next one or two digits */
n = -1;
if ('0' <= ch && ch <= '9') {
n = ch - '0';
++pos;
ch = str[pos];
}
if ('0' <= ch && ch <= '9') {
n *= 10;
n += ch - '0';
++pos;
ch = str[pos];
}
if (0 <= n && n <= 59) {
minute = n;
}
else {
isValid = 0;
}
/*
* Second (optional) -- two digits
*/
/* -- scan for ':' or start of time zone */
while (ch && !(ch == ':' || ch == '+' || ch == '-' || isalpha(ch))) {
if (ch == '(') {
pos += CommentLength(&str[pos]);
}
else {
++pos;
}
ch = str[pos];
}
/* -- get the seconds, if it's there */
if (ch == ':') {
++pos;
/* -- skip non-digits */
ch = str[pos];
while (ch && !('0' <= ch && ch <= '9')) {
if (ch == '(') {
pos += CommentLength(&str[pos]);
}
else {
++pos;
}
ch = str[pos];
}
/* -- convert next one or two digits */
n = -1;
if ('0' <= ch && ch <= '9') {
n = ch - '0';
++pos;
ch = str[pos];
}
if ('0' <= ch && ch <= '9') {
n *= 10;
n += ch - '0';
++pos;
ch = str[pos];
}
if (0 <= n && n <= 59) {
second = n;
}
else {
isValid = 0;
}
/* -- scan for start of time zone */
while (ch && !(ch == '+' || ch == '-' || isalpha(ch))) {
if (ch == '(') {
pos += CommentLength(&str[pos]);
}
else {
++pos;
}
ch = str[pos];
}
}
else /* if (ch != ':') */ {
second = 0;
}
/*
* Time zone
*
* Note: According to RFC-1123, the military time zones are specified
* incorrectly in RFC-822. RFC-1123 then states that "military time
* zones in RFC-822 headers carry no information."
* Here, we follow the specification in RFC-822. What else could we
* do? Military time zones should *never* be used!
*/
sgn = 1;
numZoneDigits = 0;
switch (ch) {
case '-':
sgn = -1;
/* fall through */
case '+':
++pos;
/* -- skip non-digits */
ch = str[pos];
while (ch && !('0' <= ch && ch <= '9')) {
++pos;
ch = str[pos];
}
while( str[pos + numZoneDigits] && isdigit(str[pos + numZoneDigits] ) )
++numZoneDigits;
/* -- convert next four digits */
n = 0;
while ( numZoneDigits ) {
switch(numZoneDigits) {
case 4:
if ('0' <= ch && ch <= '9') {
n = (ch - '0')*600;
++pos;
ch = str[pos];
}
break;
case 3:
if ('0' <= ch && ch <= '9') {
n += (ch - '0')*60;
++pos;
ch = str[pos];
}
break;
case 2:
if ('0' <= ch && ch <= '9') {
n += (ch - '0')*10;
++pos;
ch = str[pos];
}
break;
case 1:
if ('0' <= ch && ch <= '9') {
n += ch - '0';
}
break;
default:
break;
}
--numZoneDigits;
}
zone = sgn*n;
break;
case 'U':
case 'u':
if (str[pos+1] == 'T' || str[pos+1] == 't') {
zone = 0;
}
break;
case 'G':
case 'g':
if ((str[pos+1] == 'M' || str[pos+1] == 'm')
&& (str[pos+2] == 'T' || str[pos+2] == 't')) {
zone = 0;
}
break;
case 'E':
case 'e':
if ((str[pos+1] == 'S' || str[pos+1] == 's')
&& (str[pos+2] == 'T' || str[pos+2] == 't')) {
zone = -300;
}
else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
&& (str[pos+2] == 'T' || str[pos+2] == 't')) {
zone = -240;
}
break;
case 'C':
case 'c':
if ((str[pos+1] == 'S' || str[pos+1] == 's')
&& (str[pos+2] == 'T' || str[pos+2] == 't')) {
zone = -360;
}
else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
&& (str[pos+2] == 'T' || str[pos+2] == 't')) {
zone = -300;
}
else if ((str[pos+1] == 'E' || str[pos+1] == 'e') // allow non-RFC822 "CET"
&& (str[pos+2] == 'T' || str[pos+2] == 't')) {
zone = 60;
}
else if ((str[pos+1] == 'E' || str[pos+1] == 'e') // allow non-RFC822 "CEST"
&& (str[pos+2] == 'S' || str[pos+2] == 's')
&& (str[pos+3] == 'T' || str[pos+3] == 't')) {
zone = 120;
}
break;
case 'M':
case 'm':
if ((str[pos+1] == 'S' || str[pos+1] == 's')
&& (str[pos+2] == 'T' || str[pos+2] == 't')) {
zone = -420;
}
else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
&& (str[pos+2] == 'T' || str[pos+2] == 't')) {
zone = -360;
}
break;
case 'P':
case 'p':
if ((str[pos+1] == 'S' || str[pos+1] == 's')
&& (str[pos+2] == 'T' || str[pos+2] == 't')) {
zone = -480;
}
else if ((str[pos+1] == 'D' || str[pos+1] == 'd')
&& (str[pos+2] == 'T' || str[pos+2] == 't')) {
zone = -420;
}
break;
case 'Z':
/* Military time zone */
zone = 0;
break;
default:
/* Military time zone */
if ('A' <= ch && ch <= 'I') {
zone = 'A' - 1 - ch;
}
else if ('K' <= ch && ch <= 'M') {
zone = 'A' - ch;
}
else if ('N' <= ch && ch <= 'Y') {
zone = ch - 'N' + 1;
}
/* Some software doesn't set the timezone, so we default
to +/-0 so KMail isn't too strict. --dnaber@mini.gt.owl.de, 2000-06-11
else {
isValid = 0;
} */
break;
}
if (isValid) {
if (tms) {
tms->tm_year = year - 1900;
tms->tm_mon = month;
tms->tm_mday = day;
tms->tm_hour = hour;
tms->tm_min = minute;
tms->tm_sec = second;
}
if (z) {
*z = zone;
}
}
else {
if (tms) {
tms->tm_year = 70;
tms->tm_mon = 0;
tms->tm_mday = 1;
tms->tm_hour = 0;
tms->tm_min = 0;
tms->tm_sec = 0;
}
if (z) {
*z = 0;
}
}
return isValid ? 0 : -1;
}
const char* wdays[] = {
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};
const char* months[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
#ifdef DW_TESTING_DATEPARSER
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
const char* testStr[] = {
""
};
int main()
{
struct tm *ptms, tms1, tms2;
time_t tt;
int i, zone1, zone2;
char buf[100], sgn;
/* try a bunch of random dates */
srand(100);
for (i=0; i < 1000; ++i) {
tt = rand()*((double)0x7fffffff/RAND_MAX);
zone1 = (rand()%49 - 24)*30;
gmtime(&tt, &ptms);
tms1 = *ptms;
sgn = (zone1 >= 0) ? '+' : '-';
snprintf(buf, sizeof(buf), "%s, %2d %s %d %d%d:%d%d:%d%d %c%d%d%d%d",
wdays[tms1.tm_wday], tms1.tm_mday, months[tms1.tm_mon],
tms1.tm_year+1900,
tms1.tm_hour/10, tms1.tm_hour%10,
tms1.tm_min/10, tms1.tm_min%10,
tms1.tm_sec/10, tms1.tm_sec%10,
sgn, abs(zone1)/60/10, abs(zone1)/60%10,
abs(zone1)%60/10, abs(zone1)%60%10);
ParseRfc822Date(buf, &tms2, &zone2);
if (tms1.tm_year != tms2.tm_year) {
fprintf(stderr, "Bad year\n");
}
if (tms1.tm_mon != tms2.tm_mon) {
fprintf(stderr, "Bad month\n");
}
if (tms1.tm_mday != tms2.tm_mday) {
fprintf(stderr, "Bad day\n");
}
if (tms1.tm_hour != tms2.tm_hour) {
fprintf(stderr, "Bad hour\n");
}
if (tms1.tm_min != tms2.tm_min) {
fprintf(stderr, "Bad minute\n");
}
if (tms1.tm_sec != tms2.tm_sec) {
fprintf(stderr, "Bad second\n");
}
if (zone1 != zone2) {
fprintf(stderr, "Bad zone\n");
}
}
return 0;
}
#endif
// try to parse a date/time string given in a format not
// correctly specified in RFC822 format
// Here we detect the following format:
// "WWW MMM dd HH:MM:SS [Z] YYYY" zone is optional
// e.g.: Fri Oct 14 09:21:49 CEST 2005
// or: Tue Mar 23 18:00:02 2004
// also: Tue, Feb 04, 2003 00:01:20 +0000
#include <string.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C"
#endif
int ParseDate(const char *str, struct tm *tms, int *z)
{
if ( !str )
return -1;
size_t len = strlen(str);
if ( len < 24 ) // at least "WWW MMM dd HH:MM:SS YYYY"
return -1;
int day=1, month=0, year=1970, hour=0, minute=0, second=0, zone=0;
int i;
// check for week day
for (i = 0; i < 7; i++)
if ( strncmp(str, wdays[i], 3) == 0 )
break;
if ( i == 7 )
return -1;
// check for month name
int offset = (str[3] == ',') ? 5 : 4; // allow weekday be terminated with ","
for (i = 0; i < 12; i++)
if ( strncmp(str+offset, months[i], 3) == 0 )
break;
if ( i == 12 )
return -1;
month = i;
// try "dd, YYYY HH:MM:SS +ZZZZ"
int h, m;
char sign;
if ( sscanf(str+offset+4, "%d, %d %d:%d:%d %c%2d%2d", &day, &year, &hour, &minute, &second, &sign, &h, &m) == 8 ) {
// ok, worked, calculate zone
zone = h * 60 + m;
if ( sign == '-' )
zone = -zone;
}
else {
// try "dd HH:MM:SS"
if ( sscanf(str+8, "%d %d:%d:%d", &day, &hour, &minute, &second) != 4 )
return -1;
if ( isdigit(str[20]) ) { // year without zone info, as in ctime()
if ( sscanf(str+20, "%d", &year) != 1 )
return -1;
}
else {
if ( sscanf(str+20, "%*s %d", &year) != 1 )
return -1;
if ( strncmp(str+20, "EST" , 3) == 0 ) zone = -5 * 60;
else if ( strncmp(str+20, "EDT" , 3) == 0 ) zone = -4 * 60;
else if ( strncmp(str+20, "CST" , 3) == 0 ) zone = -6 * 60;
else if ( strncmp(str+20, "CDT" , 3) == 0 ) zone = -5 * 60;
else if ( strncmp(str+20, "MST" , 3) == 0 ) zone = -7 * 60;
else if ( strncmp(str+20, "MDT" , 3) == 0 ) zone = -6 * 60;
else if ( strncmp(str+20, "PST" , 3) == 0 ) zone = -8 * 60;
else if ( strncmp(str+20, "PDT" , 3) == 0 ) zone = -7 * 60;
else if ( strncmp(str+20, "CET" , 3) == 0 ) zone = 60;
else if ( strncmp(str+20, "CEST", 4) == 0 ) zone = 120;
}
}
if ( (day < 1) || (day > 31) ||
(hour < 0) || (hour > 23) ||
(minute < 0) || (minute > 59) ||
(second < 0) || (second > 59) ||
(year < 1900) )
return -1;
if ( tms ) {
tms->tm_year = year - 1900;
tms->tm_mon = month;
tms->tm_mday = day;
tms->tm_hour = hour;
tms->tm_min = minute;
tms->tm_sec = second;
}
if ( z ) *z = zone;
return 0;
}