You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
koffice/kspread/valueparser.cpp

632 lines
15 KiB

/* This file is part of the KDE project
Copyright 2004 Tomas Mecir <mecirt@gmail.com>
Copyright (C) 1998, 1999 Torben Weis <weis@kde.org>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "valueparser.h"
#include "kspread_cell.h"
#include "kspread_format.h"
#include "kspread_locale.h"
#include "kspread_value.h"
using namespace KSpread;
ValueParser::ValueParser( TDELocale* locale ) : parserLocale( locale )
{
}
TDELocale* ValueParser::locale()
{
return parserLocale;
}
void ValueParser::parse (const TQString& str, Cell *cell)
{
FormatType format = cell->formatType();
// If the text is empty, we don't have a value
// If the user stated explicitly that he wanted text
// (using the format or using a quote),
// then we don't parse as a value, but as string.
if ( str.isEmpty() || format == Text_format || str.at(0)=='\'' )
{
cell->setValue (str);
return;
}
TQString strStripped = str.stripWhiteSpace();
// Try parsing as various datatypes, to find the type of the cell
// First as number
if (tryParseNumber (strStripped, cell))
return;
// Then as bool
if (tryParseBool (strStripped, cell))
return;
// Test for money number
bool ok;
double money = parserLocale->readMoney (strStripped, &ok);
if (ok)
{
cell->format()->setPrecision(2);
Value val (money);
val.setFormat (Value::fmt_Money);
cell->setValue (val);
return;
}
if (tryParseDate (strStripped, cell))
return;
if (tryParseTime (strStripped, cell))
return;
// Nothing particular found, then this is simply a string
cell->setValue (Value (str));
}
Value ValueParser::parse (const TQString &str)
{
Value val;
// If the text is empty, we don't have a value
// If the user stated explicitly that he wanted text
// (using the format or using a quote),
// then we don't parse as a value, but as string.
if ( str.isEmpty() || str.at(0)=='\'' )
{
val.setValue (str);
return val;
}
bool ok;
TQString strStripped = str.stripWhiteSpace();
// Try parsing as various datatypes, to find the type of the string
// First as number
val = tryParseNumber (strStripped, &ok);
if (ok)
return val;
// Then as bool
// Note - I swapped the order of these two to try parsing as a number
// first because that will probably be the most common case
val = tryParseBool (strStripped, &ok);
if (ok)
return val;
// Test for money number
double money = parserLocale->readMoney (strStripped, &ok);
if (ok)
{
val.setValue (money);
val.setFormat (Value::fmt_Money);
return val;
}
val = tryParseDate (strStripped, &ok);
if (ok)
return val;
val = tryParseTime (strStripped, &ok);
if (ok)
return val;
// Nothing particular found, then this is simply a string
val.setValue (str);
return val;
}
bool ValueParser::tryParseBool (const TQString& str, Cell *cell)
{
bool ok;
Value val = tryParseBool (str, &ok);
if (ok)
cell->setValue (val);
return ok;
}
bool ValueParser::tryParseNumber (const TQString& str, Cell *cell)
{
bool ok;
Value val = tryParseNumber (str, &ok);
if (ok)
cell->setValue (val);
return ok;
}
bool ValueParser::tryParseDate (const TQString& str, Cell *cell)
{
bool ok;
Value value = tryParseDate (str, &ok);
if (ok)
cell->setValue (value);
return ok;
}
bool ValueParser::tryParseTime (const TQString& str, Cell *cell)
{
bool ok;
Value value = tryParseTime (str, &ok);
if (ok)
cell->setValue (value);
return ok;
}
Value ValueParser::tryParseBool (const TQString& str, bool *ok)
{
Value val;
if (ok) *ok = false;
const TQString& lowerStr = str.lower();
if ((lowerStr == "true") ||
(lowerStr == parserLocale->translate("true").lower()))
{
val.setValue (true);
if (ok) *ok = true;
}
else if ((lowerStr == "false") ||
(lowerStr == parserLocale->translate("false").lower()))
{
val.setValue (false);
if (ok) *ok = true;
fmtType = Number_format; //TODO: really?
}
return val;
}
double ValueParser::readNumber(const TQString &_str, bool * ok, bool * isInt)
{
TQString str = _str.stripWhiteSpace();
bool neg = str.find(parserLocale->negativeSign()) == 0;
if (neg)
str.remove( 0, parserLocale->negativeSign().length() );
/* will hold the scientific notation portion of the number.
Example, with 2.34E+23, exponentialPart == "E+23"
*/
TQString exponentialPart;
int EPos;
EPos = str.find('E', 0, false);
if (EPos != -1)
{
exponentialPart = str.mid(EPos);
str = str.left(EPos);
}
int pos = str.find(parserLocale->decimalSymbol());
TQString major;
TQString minor;
if ( pos == -1 )
{
major = str;
if (isInt) *isInt = true;
}
else
{
major = str.left(pos);
minor = str.mid(pos + parserLocale->decimalSymbol().length());
if (isInt) *isInt = false;
}
// Remove thousand separators
int thlen = parserLocale->thousandsSeparator().length();
int lastpos = 0;
while ( ( pos = major.find( parserLocale->thousandsSeparator() ) ) > 0 )
{
// e.g. 12,,345,,678,,922 Acceptable positions (from the end) are 5, 10, 15... i.e. (3+thlen)*N
int fromEnd = major.length() - pos;
if ( fromEnd % (3+thlen) != 0 // Needs to be a multiple, otherwise it's an error
|| pos - lastpos > 3 // More than 3 digits between two separators -> error
|| pos == 0 // Can't start with a separator
|| (lastpos>0 && pos-lastpos!=3)) // Must have exactly 3 digits between two separators
{
if (ok) *ok = false;
return 0.0;
}
lastpos = pos;
major.remove( pos, thlen );
}
if (lastpos>0 && major.length()-lastpos!=3) // Must have exactly 3 digits after the last separator
{
if (ok) *ok = false;
return 0.0;
}
TQString tot;
if (neg) tot = '-';
tot += major + '.' + minor + exponentialPart;
return tot.toDouble(ok);
}
Value ValueParser::tryParseNumber (const TQString& str, bool *ok)
{
Value value;
bool percent = false;
TQString str2;
if( str.at(str.length()-1)=='%')
{
str2 = str.left (str.length()-1).stripWhiteSpace();
percent = true;
}
else
str2 = str;
// First try to understand the number using the parserLocale
bool isInt;
double val = readNumber (str2, ok, &isInt);
// If not, try with the '.' as decimal separator
if (!(*ok))
{
val = str2.toDouble(ok);
if (str.contains('.'))
isInt = false;
else
isInt = true;
}
if (*ok)
{
if (percent)
{
//kdDebug(36001) << "ValueParser::tryParseNumber '" << str <<
// "' successfully parsed as percentage: " << val << "%" << endl;
value.setValue (val / 100.0);
value.setFormat (Value::fmt_Percent);
fmtType = Percentage_format;
}
else
{
//kdDebug(36001) << "ValueParser::tryParseNumber '" << str <<
// "' successfully parsed as number: " << val << endl;
if (isInt)
value.setValue (static_cast<long> (val));
else
value.setValue (val);
if ( str2.contains('E') || str2.contains('e') )
fmtType = Scientific_format;
else
{
if (val > 1e+10)
fmtType = Scientific_format;
else
fmtType = Number_format;
}
}
}
return value;
}
Value ValueParser::tryParseDate (const TQString& str, bool *ok)
{
bool valid = false;
TQDate tmpDate = parserLocale->readDate (str, &valid);
if (!valid)
{
// Try without the year
// The tricky part is that we need to remove any separator around the year
// For instance %Y-%m-%d becomes %m-%d and %d/%m/%Y becomes %d/%m
// If the year is in the middle, say %m-%Y/%d, we'll remove the sep.
// before it (%m/%d).
TQString fmt = parserLocale->dateFormatShort();
int yearPos = fmt.find ("%Y", 0, false);
if ( yearPos > -1 )
{
if ( yearPos == 0 )
{
fmt.remove( 0, 2 );
while ( fmt[0] != '%' )
fmt.remove( 0, 1 );
} else
{
fmt.remove( yearPos, 2 );
for ( ; yearPos > 0 && fmt[yearPos-1] != '%'; --yearPos )
fmt.remove( yearPos, 1 );
}
//kdDebug(36001) << "Cell::tryParseDate short format w/o date: " << fmt << endl;
tmpDate = parserLocale->readDate( str, fmt, &valid );
}
}
if (valid)
{
// Note: if shortdate format only specifies 2 digits year, then 3/4/1955
// will be treated as in year 3055, while 3/4/55 as year 2055
// (because 55 < 69, see TDELocale) and thus there's no way to enter for
// year 1995
// The following fixes the problem, 3/4/1955 will always be 1955
TQString fmt = parserLocale->dateFormatShort();
if( ( fmt.contains( "%y" ) == 1 ) && ( tmpDate.year() > 2999 ) )
tmpDate = tmpDate.addYears( -1900 );
// this is another HACK !
// with two digit years, 0-69 is treated as year 2000-2069 (see TDELocale)
// however, in Excel only 0-29 is year 2000-2029, 30 or later is 1930
// onwards
// the following provides workaround for TDELocale so we're compatible
// with Excel
// (e.g 3/4/45 is Mar 4, 1945 not Mar 4, 2045)
if( ( tmpDate.year() >= 2030 ) && ( tmpDate.year() <= 2069 ) )
{
TQString yearFourDigits = TQString::number( tmpDate.year() );
TQString yearTwoDigits = TQString::number( tmpDate.year() % 100 );
// if year is 2045, check to see if "2045" isn't there --> actual
// input is "45"
if( ( str.contains( yearTwoDigits ) >= 1 ) &&
( str.contains( yearFourDigits ) == 0 ) )
tmpDate = tmpDate.addYears( -100 );
}
//test if it's a short date or text date.
if (parserLocale->formatDate (tmpDate, false) == str)
fmtType = TextDate_format;
else
fmtType = ShortDate_format;
}
if (!valid)
{
//try to use the standard TQt date parsing, using ISO 8601 format
tmpDate = TQDate::fromString(str,Qt::ISODate);
if (tmpDate.isValid())
{
valid = true;
}
}
if (ok)
*ok = valid;
return Value (tmpDate);
}
Value ValueParser::tryParseTime (const TQString& str, bool *ok)
{
if (ok)
*ok = false;
bool valid = false;
bool duration = false;
Value val;
TQDateTime tmpTime = readTime (str, true, &valid, duration);
if (!tmpTime.isValid())
tmpTime = readTime (str, false, &valid, duration);
if (!valid)
{
TQTime tm;
if (parserLocale->use12Clock())
{
TQString stringPm = parserLocale->translate("pm");
TQString stringAm = parserLocale->translate("am");
int pos=0;
if((pos=str.find(stringPm))!=-1)
{
TQString tmp=str.mid(0,str.length()-stringPm.length());
tmp=tmp.simplifyWhiteSpace();
tm = parserLocale->readTime(tmp+" "+stringPm, &valid);
if (!valid)
tm = parserLocale->readTime(tmp+":00 "+stringPm, &valid);
}
else if((pos=str.find(stringAm))!=-1)
{
TQString tmp = str.mid(0,str.length()-stringAm.length());
tmp = tmp.simplifyWhiteSpace();
tm = parserLocale->readTime (tmp + " " + stringAm, &valid);
if (!valid)
tm = parserLocale->readTime (tmp + ":00 " + stringAm, &valid);
}
if (valid)
tmpTime.setTime(tm);
}
}
if (valid)
{
fmtType = Time_format;
if ( duration )
{
val.setValue (tmpTime);
fmtType = Time_format7;
}
else
val.setValue (tmpTime.time());
}
if (ok)
*ok = valid;
return val;
}
TQDateTime ValueParser::readTime (const TQString & intstr, bool withSeconds,
bool *ok, bool & duration)
{
duration = false;
TQString str = intstr.simplifyWhiteSpace().lower();
TQString format = parserLocale->timeFormat().simplifyWhiteSpace();
if ( !withSeconds )
{
int n = format.find("%S");
format = format.left( n - 1 );
}
int days = -1;
int hour = -1, minute = -1;
int second = withSeconds ? -1 : 0; // don't require seconds
bool g_12h = false;
bool pm = false;
uint strpos = 0;
uint formatpos = 0;
TQDate refDate( 1899, 12, 31 );
uint l = format.length();
uint sl = str.length();
while (l > formatpos || sl > strpos)
{
if ( !(l > formatpos && sl > strpos) )
goto error;
TQChar c( format.at( formatpos++ ) );
if (c != '%')
{
if (c.isSpace())
++strpos;
else if (c != str.at(strpos++))
goto error;
continue;
}
// remove space at the begining
if (sl > strpos && str.at( strpos).isSpace() )
++strpos;
c = format.at( formatpos++ );
switch (c)
{
case 'p':
{
TQString s;
s = parserLocale->translate("pm").lower();
int len = s.length();
if (str.mid(strpos, len) == s)
{
pm = true;
strpos += len;
}
else
{
s = parserLocale->translate("am").lower();
len = s.length();
if (str.mid(strpos, len) == s)
{
pm = false;
strpos += len;
}
else
goto error;
}
}
break;
case 'k':
case 'H':
g_12h = false;
hour = readInt(str, strpos);
if (hour < 0)
goto error;
if (hour > 23)
{
days = (int)(hour / 24);
hour %= 24;
}
break;
case 'l':
case 'I':
g_12h = true;
hour = readInt(str, strpos);
if (hour < 1 || hour > 12)
goto error;
break;
case 'M':
minute = readInt(str, strpos);
if (minute < 0 || minute > 59)
goto error;
break;
case 'S':
second = readInt(str, strpos);
if (second < 0 || second > 59)
goto error;
break;
}
}
if (g_12h)
{
hour %= 12;
if (pm) hour += 12;
}
if (days > 0)
{
refDate.addDays( days );
duration = true;
}
if (ok)
*ok = true;
return TQDateTime( refDate, TQTime( hour, minute, second ) );
error:
if (ok)
*ok = false;
// return invalid date if it didn't work
return TQDateTime( refDate, TQTime( -1, -1, -1 ) );
}
/**
* helper function to read integers, used in readTime
* @param str
* @param pos the position to start at. It will be updated when we parse it.
* @return the integer read in the string, or -1 if no string
*/
int ValueParser::readInt (const TQString &str, uint &pos)
{
if (!str.at(pos).isDigit())
return -1;
int result = 0;
for ( ; str.length() > pos && str.at(pos).isDigit(); pos++ )
{
result *= 10;
result += str.at(pos).digitValue();
}
return result;
}