You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdegraphics/kviewshell/plugins/djvu/libdjvu/GString.cpp

2812 lines
60 KiB

//C- -*- C++ -*-
//C- -------------------------------------------------------------------
//C- DjVuLibre-3.5
//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
//C- Copyright (c) 2001 AT&T
//C-
//C- This software is subject to, and may be distributed under, the
//C- GNU General Public License, Version 2. The license should have
//C- accompanied the software or you may obtain a copy of the license
//C- from the Free Software Foundation at http://www.fsf.org .
//C-
//C- This program is distributed in the hope that it will be useful,
//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//C- GNU General Public License for more details.
//C-
//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
//C- distributed by Lizardtech Software. On July 19th 2002, Lizardtech
//C- Software authorized us to replace the original DjVu(r) Reference
//C- Library notice by the following text (see doc/lizard2002.djvu):
//C-
//C- ------------------------------------------------------------------
//C- | DjVu (r) Reference Library (v. 3.5)
//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
//C- | The DjVu Reference Library is protected by U.S. Pat. No.
//C- | 6,058,214 and patents pending.
//C- |
//C- | This software is subject to, and may be distributed under, the
//C- | GNU General Public License, Version 2. The license should have
//C- | accompanied the software or you may obtain a copy of the license
//C- | from the Free Software Foundation at http://www.fsf.org .
//C- |
//C- | The computer code originally released by LizardTech under this
//C- | license and unmodified by other parties is deemed "the LIZARDTECH
//C- | ORIGINAL CODE." Subject to any third party intellectual property
//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
//C- | non-exclusive license to make, use, sell, or otherwise dispose of
//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
//C- | General Public License. This grant only confers the right to
//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
//C- | the extent such infringement is reasonably necessary to enable
//C- | recipient to make, have made, practice, sell, or otherwise dispose
//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
//C- | any greater extent that may be necessary to utilize further
//C- | modifications or combinations.
//C- |
//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
//C- +------------------------------------------------------------------
//
// $Id: GString.cpp,v 1.22 2005/04/27 16:34:13 leonb Exp $
// $Name: release_3_5_15 $
// From: Leon Bottou, 1/31/2002
// This file has very little to do with my initial implementation.
// It has been practically rewritten by Lizardtech for i18n changes.
// My original implementation was very small in comparison
// <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
// In my opinion, the duplication of the string classes is a failed
// attempt to use the type system to enforce coding policies.
// This could be fixed. But there are better things to do in djvulibre.
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#if NEED_GNUG_PRAGMAS
# pragma implementation
#endif
#include "GString.h"
#include "GThreads.h"
#include "debug.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#if HAS_WCHAR
# include <locale.h>
# if !defined(AUTOCONF) || HAVE_WCHAR_H
# include <wchar.h>
# endif
# if HAS_WCTYPE
# include <wctype.h>
# endif
#endif
#include <ctype.h>
#ifndef DO_CHANGELOCALE
#define DO_CHANGELOCALE 1
#ifdef UNIX
#if THREADMODEL != COTHREADS
#if THREADMODEL != NOTHREADS
#undef DO_CHANGELOCALE
#define DO_CHANGELOCALE 0
#endif
#endif
#endif
#endif
#ifdef HAVE_NAMESPACES
namespace DJVU {
# ifdef NOT_DEFINED // Just to fool emacs c++ mode
}
#endif
#endif
GBaseString::~GBaseString() {}
GNativeString::~GNativeString() {}
GUTF8String::~GUTF8String() {}
#if !HAS_MBSTATE && HAS_WCHAR
// Under some systems, wctomb() and mbtowc() are not thread
// safe. In those cases, wcrtomb and mbrtowc are preferred.
// For Solaris, wctomb() and mbtowc() are thread safe, and
// wcrtomb() and mbrtowc() don't exist.
#define wcrtomb MYwcrtomb
#define mbrtowc MYmbrtowc
#define mbrlen MYmbrlen
static inline int
wcrtomb(char *bytes,wchar_t w,mbstate_t *)
{
return wctomb(bytes,w);
}
static inline int
mbrtowc(wchar_t *w,const char *source, size_t n, mbstate_t *)
{
return mbtowc(w,source,n);
}
static inline size_t
mbrlen(const char *s, size_t n, mbstate_t *)
{
return mblen(s,n);
}
#endif // !HAS_MBSTATE || HAS_WCHAR
GP<GStringRep>
GStringRep::upcase(void) const
{ return tocase(giswupper,gtowupper); }
GP<GStringRep>
GStringRep::downcase(void) const
{ return tocase(giswlower,gtowlower); }
GP<GStringRep>
GStringRep::UTF8::create(const unsigned int sz)
{
return GStringRep::create(sz,(GStringRep::UTF8 *)0);
}
GP<GStringRep>
GStringRep::UTF8::create(const char *s)
{
GStringRep::UTF8 dummy;
return dummy.strdup(s);
}
GP<GStringRep>
GStringRep::UTF8::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2)
{
GStringRep::UTF8 dummy;
return dummy.concat(s1,s2);
}
GP<GStringRep>
GStringRep::UTF8::create( const GP<GStringRep> &s1,const char *s2)
{
GStringRep::UTF8 dummy;
return dummy.concat(s1,s2);
}
GP<GStringRep>
GStringRep::UTF8::create( const char *s1, const GP<GStringRep> &s2)
{
GStringRep::UTF8 dummy;
return dummy.concat(s1,s2);
}
GP<GStringRep>
GStringRep::UTF8::create( const char *s1,const char *s2)
{
GStringRep::UTF8 dummy;
return dummy.concat(s1,s2);
}
GP<GStringRep>
GStringRep::UTF8::create(const char *s,const int start,const int length)
{
GStringRep::UTF8 dummy;
return dummy.substr(s,start,length);
}
GP<GStringRep>
GStringRep::UTF8::create(
const unsigned short *s,const int start,const int length)
{
GStringRep::UTF8 dummy;
return dummy.substr(s,start,length);
}
GP<GStringRep>
GStringRep::UTF8::create(
const unsigned long *s,const int start,const int length)
{
GStringRep::UTF8 dummy;
return dummy.substr(s,start,length);
}
GP<GStringRep>
GStringRep::UTF8::blank(const unsigned int sz) const
{
return GStringRep::create(sz,(GStringRep::UTF8 *)0);
}
bool
GStringRep::UTF8::isUTF8(void) const
{
return true;
}
GP<GStringRep>
GStringRep::UTF8::toThis(
const GP<GStringRep> &rep,const GP<GStringRep> &) const
{
return rep?(rep->toUTF8(true)):rep;
}
GP<GStringRep>
GStringRep::UTF8::create(const char fmt[],va_list& args)
{
const GP<GStringRep> s(create(fmt));
return (s?(s->vformat(args)):s);
}
#if !HAS_WCHAR
#define NATIVE_CREATE(x) UTF8::create( x );
#ifdef LC_ALL
#undef LC_ALL
#endif
#define LC_ALL 0
class GStringRep::ChangeLocale
{
public:
ChangeLocale(const int,const char *) {}
~ChangeLocale() {};
};
GP<GStringRep>
GStringRep::NativeToUTF8( const char *s )
{
return GStringRep::UTF8::create(s);
}
#else
#define NATIVE_CREATE(x) Native::create( x );
// The declaration and implementation of GStringRep::ChangeLocale
// Not used in WinCE
class GStringRep::ChangeLocale
{
public:
ChangeLocale(const int category,const char locale[]);
~ChangeLocale();
private:
GUTF8String locale;
int category;
};
class GStringRep::Native : public GStringRep
{
public:
// default constructor
Native(void);
// virtual destructor
virtual ~Native();
// Other virtual methods.
// Create an empty string.
virtual GP<GStringRep> blank(const unsigned int sz = 0) const;
// Append a string.
virtual GP<GStringRep> append(const GP<GStringRep> &s2) const;
// Test if Native.
virtual bool isNative(void) const;
// Convert to Native.
virtual GP<GStringRep> toNative(
const EscapeMode escape=UNKNOWN_ESCAPED) const;
// Convert to UTF8.
virtual GP<GStringRep> toUTF8(const bool nothrow=false) const;
// Convert to UTF8.
virtual GP<GStringRep> toThis(
const GP<GStringRep> &rep,const GP<GStringRep> &) const;
// Compare with #s2#.
virtual int cmp(const GP<GStringRep> &s2, const int len=(-1)) const;
// Convert strings to numbers.
virtual int toInt(void) const;
virtual long toLong(
const int pos, int &endpos, const int base=10) const;
virtual unsigned long toULong(
const int pos, int &endpos, const int base=10) const;
virtual double toDouble(
const int pos, int &endpos) const;
// Create an empty string
static GP<GStringRep> create(const unsigned int sz = 0);
// Create a strdup string.
static GP<GStringRep> create(const char *s);
// Creates by appending to the current string
// Creates with a concat operation.
static GP<GStringRep> create(
const GP<GStringRep> &s1,const GP<GStringRep> &s2);
static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2);
static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2);
static GP<GStringRep> create(const char *s1,const char *s2);
// Create with a strdup and substr operation.
static GP<GStringRep> create(
const char *s,const int start,const int length=(-1));
static GP<GStringRep> create(
const unsigned short *s,const int start,const int length=(-1));
static GP<GStringRep> create(
const unsigned long *s,const int start,const int length=(-1));
// Create with an sprintf()
static GP<GStringRep> create_format(const char fmt[],...);
static GP<GStringRep> create(const char fmt[],va_list &args);
virtual unsigned char *UCS4toString(
const unsigned long w,unsigned char *ptr, mbstate_t *ps=0) const;
// Tests if a string is legally encoded in the current character set.
virtual bool is_valid(void) const;
virtual int ncopy(wchar_t * const buf, const int buflen) const;
friend class GBaseString;
protected:
// Return the next character and increment the source pointer.
virtual unsigned long getValidUCS4(const char *&source) const;
};
GP<GStringRep>
GStringRep::Native::create(const unsigned int sz)
{
return GStringRep::create(sz,(GStringRep::Native *)0);
}
// Create a strdup string.
GP<GStringRep>
GStringRep::Native::create(const char *s)
{
GStringRep::Native dummy;
return dummy.strdup(s);
}
GP<GStringRep>
GStringRep::Native::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2)
{
GStringRep::Native dummy;
return dummy.concat(s1,s2);
}
GP<GStringRep>
GStringRep::Native::create( const GP<GStringRep> &s1,const char *s2)
{
GStringRep::Native dummy;
return dummy.concat(s1,s2);
}
GP<GStringRep>
GStringRep::Native::create( const char *s1, const GP<GStringRep> &s2)
{
GStringRep::Native dummy;
return dummy.concat(s1,s2);
}
GP<GStringRep>
GStringRep::Native::create(const char *s1,const char *s2)
{
GStringRep::Native dummy;
return dummy.concat(s1,s2);
}
GP<GStringRep>
GStringRep::Native::create(
const char *s,const int start,const int length)
{
GStringRep::Native dummy;
return dummy.substr(s,start,length);
}
GP<GStringRep>
GStringRep::Native::create(
const unsigned short *s,const int start,const int length)
{
GStringRep::Native dummy;
return dummy.substr(s,start,length);
}
GP<GStringRep>
GStringRep::Native::create(
const unsigned long *s,const int start,const int length)
{
GStringRep::Native dummy;
return dummy.substr(s,start,length);
}
GP<GStringRep>
GStringRep::Native::blank(const unsigned int sz) const
{
return GStringRep::create(sz,(GStringRep::Native *)0);
}
bool
GStringRep::Native::isNative(void) const
{
return true;
}
GP<GStringRep>
GStringRep::Native::toThis(
const GP<GStringRep> &rep,const GP<GStringRep> &) const
{
return rep?(rep->toNative(NOT_ESCAPED)):rep;
}
GP<GStringRep>
GStringRep::Native::create(const char fmt[],va_list &args)
{
const GP<GStringRep> s(create(fmt));
return (s?(s->vformat(args)):s);
}
int
GStringRep::Native::ncopy(
wchar_t * const buf, const int buflen ) const
{
return toUTF8()->ncopy(buf,buflen);
}
GStringRep::ChangeLocale::ChangeLocale(const int xcategory, const char xlocale[] )
: category(xcategory)
{
#if DO_CHANGELOCALE
// This is disabled under UNIX because
// it does not play nice with MT.
if(xlocale)
{
locale=setlocale(xcategory,0);
if(locale.length() &&(locale!=xlocale))
{
if(locale == setlocale(category,xlocale))
{
locale.empty();
}
}
else
{
locale.empty();
}
}
#endif
}
GStringRep::ChangeLocale::~ChangeLocale()
{
#if DO_CHANGELOCALE
if(locale.length())
{
setlocale(category,(const char *)locale);
}
#endif
}
GNativeString &
GNativeString::format(const char fmt[], ... )
{
va_list args;
va_start(args, fmt);
return init(GStringRep::Native::create(fmt,args));
}
// Gather the native implementations here. Not used in WinCE.
GStringRep::Native::Native(void) {}
GStringRep::Native::~Native() {}
GP<GStringRep>
GStringRep::Native::append(const GP<GStringRep> &s2) const
{
GP<GStringRep> retval;
if(s2)
{
if(s2->isUTF8())
{
G_THROW( ERR_MSG("GStringRep.appendUTF8toNative") );
}
retval=concat(data,s2->data);
}else
{
retval=const_cast<GStringRep::Native *>(this);
}
return retval;
}
GP<GStringRep>
GStringRep::Native::create_format(const char fmt[],...)
{
va_list args;
va_start(args, fmt);
return create(fmt,args);
}
unsigned char *
GStringRep::Native::UCS4toString(
const unsigned long w0,unsigned char *ptr, mbstate_t *ps) const
{
return UCS4toNative(w0,ptr,ps);
}
// Convert a UCS4 to a multibyte string in the value bytes.
// The data pointed to by ptr should be long enough to contain
// the results with a nill termination. (Normally 7 characters
// is enough.)
unsigned char *
GStringRep::UCS4toNative(
const unsigned long w0,unsigned char *ptr, mbstate_t *ps)
{
unsigned short w1;
unsigned short w2=1;
for(int count=(sizeof(wchar_t)==sizeof(w1)) ? UCS4toUTF16(w0,w1,w2) : 1;
count;
--count,w1=w2)
{
// wchar_t can be either UCS4 or UCS2
const wchar_t w=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0;
int i=wcrtomb((char *)ptr,w,ps);
if(i<0)
{
break;
}
ptr[i]=0;
ptr += i;
}
ptr[0]=0;
return ptr;
}
GP<GStringRep>
GStringRep::Native::toNative(const EscapeMode escape) const
{
if(escape == UNKNOWN_ESCAPED)
G_THROW( ERR_MSG("GStringRep.NativeToNative") );
return const_cast<GStringRep::Native *>(this);
}
GP<GStringRep>
GStringRep::Native::toUTF8(const bool) const
{
unsigned char *buf;
GPBuffer<unsigned char> gbuf(buf,size*6+1);
buf[0]=0;
if(data && size)
{
size_t n=size;
const char *source=data;
mbstate_t ps;
unsigned char *ptr=buf;
//(void)mbrlen(source, n, &ps);
memset(&ps,0,sizeof(mbstate_t));
int i=0;
if(sizeof(wchar_t) == sizeof(unsigned long))
{
wchar_t w = 0;
for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0); n-=i,source+=i)
{
ptr=UCS4toUTF8(w,ptr);
}
}
else
{
wchar_t w = 0;
for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0);n-=i,source+=i)
{
unsigned short s[2];
s[0]=w;
unsigned long w0;
if(UTF16toUCS4(w0,s,s+1)<=0)
{
source+=i;
n-=i;
if((n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0))
{
s[1]=w;
if(UTF16toUCS4(w0,s,s+2)<=0)
{
i=(-1);
break;
}
}
else
{
i=(-1);
break;
}
}
ptr=UCS4toUTF8(w0,ptr);
}
}
if(i<0)
{
gbuf.resize(0);
}
else
{
ptr[0]=0;
}
}
return GStringRep::UTF8::create((const char *)buf);
}
GNativeString
GBaseString::UTF8ToNative(
const bool currentlocale,const EscapeMode escape) const
{
const char *source=(*this);
GP<GStringRep> retval;
if(source && source[0])
{
#if DO_CHANGELOCALE
GUTF8String lc_ctype(setlocale(LC_CTYPE,0));
#endif
bool repeat;
for(repeat=!currentlocale;;repeat=false)
{
retval=(*this)->toNative((GStringRep::EscapeMode)escape);
#if DO_CHANGELOCALE
if (!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,"")))
#endif
break;
}
#if DO_CHANGELOCALE
if(!repeat)
{
setlocale(LC_CTYPE,(const char *)lc_ctype);
}
#endif
}
return GNativeString(retval);
}
/*MBCS*/
GNativeString
GBaseString::getUTF82Native( const EscapeMode escape ) const
{ //MBCS cvt
GNativeString retval;
// We don't want to convert this if it
// already is known to be native...
// if (isNative()) return *this;
const size_t slen=length()+1;
if(slen>1)
{
retval=UTF8ToNative(false,escape) ;
if(!retval.length())
{
retval=(const char*)*this;
}
}
return retval;
}
GUTF8String
GBaseString::NativeToUTF8(void) const
{
GP<GStringRep> retval;
if(length())
{
const char *source=(*this);
#if DO_CHANGELOCALE
GUTF8String lc_ctype=setlocale(LC_CTYPE,0);
#endif
bool repeat;
for(repeat=true;;repeat=false)
{
if( (retval=GStringRep::NativeToUTF8(source)) )
{
if(GStringRep::cmp(retval->toNative(),source))
{
retval=GStringRep::UTF8::create((unsigned int)0);
}
}
#if DO_CHANGELOCALE
if(!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,"")))
#endif
break;
}
#if DO_CHANGELOCALE
if(!repeat)
{
setlocale(LC_CTYPE,(const char *)lc_ctype);
}
#endif
}
return GUTF8String(retval);
}
GUTF8String
GBaseString::getNative2UTF8(void) const
{ //MBCS cvt
// We don't want to do a transform this
// if we already are in the given type.
// if (isUTF8()) return *this;
const size_t slen=length()+1;
GUTF8String retval;
if(slen > 1)
{
retval=NativeToUTF8();
if(!retval.length())
{
retval=(const char *)(*this);
}
}
return retval;
} /*MBCS*/
int
GStringRep::Native::cmp(const GP<GStringRep> &s2,const int len) const
{
int retval;
if(s2)
{
if(s2->isUTF8())
{
const GP<GStringRep> r(toUTF8(true));
if(r)
{
retval=GStringRep::cmp(r->data,s2->data,len);
}else
{
retval=cmp(s2->toNative(NOT_ESCAPED),len);
}
}else
{
retval=GStringRep::cmp(data,s2->data,len);
}
}else
{
retval=GStringRep::cmp(data,0,len);
}
return retval;
}
int
GStringRep::Native::toInt() const
{
return atoi(data);
}
long
GStringRep::Native::toLong(
const int pos, int &endpos, const int base) const
{
char *edata=0;
const long retval=strtol(data+pos, &edata, base);
if(edata)
{
endpos=(int)((size_t)edata-(size_t)data);
}else
{
endpos=(-1);
}
return retval;
}
unsigned long
GStringRep::Native::toULong(
const int pos, int &endpos, const int base) const
{
char *edata=0;
const unsigned long retval=strtoul(data+pos, &edata, base);
if(edata)
{
endpos=(int)((size_t)edata-(size_t)data);
}else
{
endpos=(-1);
}
return retval;
}
double
GStringRep::Native::toDouble(
const int pos, int &endpos) const
{
char *edata=0;
const double retval=strtod(data+pos, &edata);
if(edata)
{
endpos=(int)((size_t)edata-(size_t)data);
}else
{
endpos=(-1);
}
return retval;
}
unsigned long
GStringRep::Native::getValidUCS4(const char *&source) const
{
unsigned long retval=0;
int n=(int)((size_t)size+(size_t)data-(size_t)source);
if(source && (n > 0))
{
mbstate_t ps;
//(void)mbrlen(source, n, &ps);
memset(&ps,0,sizeof(mbstate_t));
wchar_t wt;
const int len=mbrtowc(&wt,source,n,&ps);
if(len>=0)
{
if(sizeof(wchar_t) == sizeof(unsigned short))
{
source+=len;
unsigned short s[2];
s[0]=(unsigned short)wt;
if(UTF16toUCS4(retval,s,s+1)<=0)
{
if((n-=len)>0)
{
const int len=mbrtowc(&wt,source,n,&ps);
if(len>=0)
{
s[1]=(unsigned short)wt;
unsigned long w;
if(UTF16toUCS4(w,s,s+2)>0)
{
source+=len;
retval=w;
}
}
}
}
}else
{
retval=(unsigned long)wt;
source++;
}
}else
{
source++;
}
}
return retval;
}
// Tests if a string is legally encoded in the current character set.
bool
GStringRep::Native::is_valid(void) const
{
bool retval=true;
if(data && size)
{
size_t n=size;
const char *s=data;
mbstate_t ps;
//(void)mbrlen(s, n, &ps);
memset(&ps,0,sizeof(mbstate_t));
do
{
size_t m=mbrlen(s,n,&ps);
if(m > n)
{
retval=false;
break;
}else if(m)
{
s+=m;
n-=m;
}else
{
break;
}
} while(n);
}
return retval;
}
// These are dummy functions.
void
GStringRep::set_remainder(void const * const, const unsigned int,
const EncodeType) {}
void
GStringRep::set_remainder(void const * const, const unsigned int,
const GP<GStringRep> &encoding) {}
void
GStringRep::set_remainder( const GP<GStringRep::Unicode> &) {}
GP<GStringRep::Unicode>
GStringRep::get_remainder( void ) const
{
return 0;
}
GNativeString::GNativeString(const char dat)
{
init(GStringRep::Native::create(&dat,0,1));
}
GNativeString::GNativeString(const char *str)
{
init(GStringRep::Native::create(str));
}
GNativeString::GNativeString(const unsigned char *str)
{
init(GStringRep::Native::create((const char *)str));
}
GNativeString::GNativeString(const unsigned short *str)
{
init(GStringRep::Native::create(str,0,-1));
}
GNativeString::GNativeString(const unsigned long *str)
{
init(GStringRep::Native::create(str,0,-1));
}
GNativeString::GNativeString(const char *dat, unsigned int len)
{
init(
GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
}
GNativeString::GNativeString(const unsigned short *dat, unsigned int len)
{
init(
GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
}
GNativeString::GNativeString(const unsigned long *dat, unsigned int len)
{
init(
GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
}
GNativeString::GNativeString(const GNativeString &str)
{
init(str);
}
GNativeString::GNativeString(const GBaseString &gs, int from, int len)
{
init(
GStringRep::Native::create(gs,from,((int)len<0)?(-1):(int)len));
}
GNativeString::GNativeString(const int number)
{
init(GStringRep::Native::create_format("%d",number));
}
GNativeString::GNativeString(const double number)
{
init(GStringRep::Native::create_format("%f",number));
}
GNativeString&
GNativeString::operator= (const char str)
{ return init(GStringRep::Native::create(&str,0,1)); }
GNativeString&
GNativeString::operator= (const char *str)
{ return init(GStringRep::Native::create(str)); }
GNativeString
GBaseString::operator+(const GNativeString &s2) const
{
return GStringRep::Native::create(*this,s2);
}
GP<GStringRep>
GStringRep::NativeToUTF8( const char *s )
{
return GStringRep::Native::create(s)->toUTF8();
}
#endif // HAS_WCHAR
template <class TYPE>
GP<GStringRep>
GStringRep::create(const unsigned int sz, TYPE *)
{
GP<GStringRep> gaddr;
if (sz > 0)
{
GStringRep *addr;
gaddr=(addr=new TYPE);
addr->data=(char *)(::operator new(sz+1));
addr->size = sz;
addr->data[sz] = 0;
}
return gaddr;
}
GP<GStringRep>
GStringRep::strdup(const char *s) const
{
GP<GStringRep> retval;
const int length=s?strlen(s):0;
if(length>0)
{
retval=blank(length);
char const * const end=s+length;
char *ptr=retval->data;
for(;*s&&(s!=end);ptr++)
{
ptr[0]=s++[0];
}
ptr[0]=0;
}
return retval;
}
GP<GStringRep>
GStringRep::substr(const char *s,const int start,const int len) const
{
GP<GStringRep> retval;
if(s && s[0])
{
const unsigned int length=(start<0 || len<0)?(unsigned int)strlen(s):(unsigned int)(-1);
const char *startptr, *endptr;
if(start<0)
{
startptr=s+length+start;
if(startptr<s)
startptr=s;
}else
{
startptr=s;
for(const char * const ptr=s+start;(startptr<ptr)&&*startptr;++startptr)
EMPTY_LOOP;
}
if(len<0)
{
if(s+length+1 < startptr+len)
{
endptr=startptr;
}else
{
endptr=s+length+1+len;
}
}else
{
endptr=startptr;
for(const char * const ptr=startptr+len;(endptr<ptr)&&*endptr;++endptr)
EMPTY_LOOP;
}
if(endptr>startptr)
{
retval=blank((size_t)(endptr-startptr));
char *data=retval->data;
for(; (startptr<endptr) && *startptr; ++startptr,++data)
{
data[0]=startptr[0];
}
data[0]=0;
}
}
return retval;
}
GP<GStringRep>
GStringRep::substr(const unsigned short *s,const int start,const int len) const
{
GP<GStringRep> retval;
if(s && s[0])
{
unsigned short const *eptr;
if(len<0)
{
for(eptr=s;eptr[0];++eptr)
EMPTY_LOOP;
}else
{
eptr=&(s[len]);
}
s=&s[start];
if((size_t)s<(size_t)eptr)
{
mbstate_t ps;
memset(&ps,0,sizeof(mbstate_t));
unsigned char *buf,*ptr;
GPBuffer<unsigned char> gbuf(buf,(((size_t)eptr-(size_t)s)/2)*3+7);
for(ptr=buf;s[0];)
{
unsigned long w;
int i=UTF16toUCS4(w,s,eptr);
if(i<=0)
break;
s+=i;
ptr=UCS4toString(w,ptr,&ps);
}
ptr[0]=0;
retval = strdup( (const char *)buf );
}
}
return retval;
}
GP<GStringRep>
GStringRep::substr(const unsigned long *s,const int start,const int len) const
{
GP<GStringRep> retval;
if(s && s[0])
{
unsigned long const *eptr;
if(len<0)
{
for(eptr=s;eptr[0];++eptr)
EMPTY_LOOP;
}else
{
eptr=&(s[len]);
}
s=&s[start];
if((size_t)s<(size_t)eptr)
{
mbstate_t ps;
memset(&ps,0,sizeof(mbstate_t));
unsigned char *buf,*ptr;
GPBuffer<unsigned char> gbuf(buf,((((size_t)eptr-(size_t)s))/4)*6+7);
for(ptr=buf;s[0];++s)
{
ptr=UCS4toString(s[0],ptr,&ps);
}
ptr[0]=0;
retval = strdup( (const char *)buf );
}
}
return retval;
}
GP<GStringRep>
GStringRep::append(const char *s2) const
{
GP<GStringRep> retval;
if(s2)
{
retval=concat(data,s2);
}else
{
retval=const_cast<GStringRep *>(this);
}
return retval;
}
GP<GStringRep>
GStringRep::UTF8::append(const GP<GStringRep> &s2) const
{
GP<GStringRep> retval;
if(s2)
{
if(s2->isNative())
{
G_THROW( ERR_MSG("GStringRep.appendNativeToUTF8") );
}
retval=concat(data,s2->data);
}else
{
retval=const_cast<GStringRep::UTF8 *>(this);
}
return retval;
}
GP<GStringRep>
GStringRep::concat(const char *s1,const char *s2) const
{
const int length1=(s1?strlen(s1):0);
const int length2=(s2?strlen(s2):0);
const int length=length1+length2;
GP<GStringRep> retval;
if(length>0)
{
retval=blank(length);
GStringRep &r=*retval;
if(length1)
{
strcpy(r.data,s1);
if(length2)
strcat(r.data,s2);
}else
{
strcpy(r.data,s2);
}
}
return retval;
}
const char *GBaseString::nullstr = "";
void
GBaseString::empty( void )
{
init(0);
}
GP<GStringRep>
GStringRep::getbuf(int n) const
{
GP<GStringRep> retval;
if(n< 0)
n=strlen(data);
if(n>0)
{
retval=blank(n);
char *ndata=retval->data;
strncpy(ndata,data,n);
ndata[n]=0;
}
return retval;
}
const char *
GStringRep::isCharType(
bool (*xiswtest)(const unsigned long wc), const char *ptr, const bool reverse) const
{
char const * xptr=ptr;
const unsigned long w=getValidUCS4(xptr);
if((ptr != xptr)
&&(((sizeof(wchar_t) == 2)&&(w&~0xffff))
||(reverse?(!xiswtest(w)):xiswtest(w))))
{
ptr=xptr;
}
return ptr;
}
int
GStringRep::nextCharType(
bool (*xiswtest)(const unsigned long wc), const int from, const int len,
const bool reverse) const
{
// We want to return the position of the next
// non white space starting from the #from#
// location. isspace should work in any locale
// so we should only need to do this for the non-
// native locales (UTF8)
int retval;
if(from<size)
{
retval=from;
const char * ptr = data+from;
for( const char * const eptr=ptr+((len<0)?(size-from):len);
(ptr<eptr) && *ptr;)
{
// Skip characters that fail the isCharType test
char const * const xptr=isCharType(xiswtest,ptr,!reverse);
if(xptr == ptr)
break;
ptr=xptr;
}
retval=(int)((size_t)ptr-(size_t)data);
}else
{
retval=size;
}
return retval;
}
bool
GStringRep::giswspace(const unsigned long w)
{
#if HAS_WCTYPE
return
((sizeof(wchar_t) == 2)&&(w&~0xffff))
||((unsigned long)iswspace((wchar_t)w))
||((w == '\r')||(w == '\n'));
#else
return
(w&~0xff)?(true):(((unsigned long)isspace((char)w))||((w == '\r')||(w == '\n')));
#endif
}
bool
GStringRep::giswupper(const unsigned long w)
{
#if HAS_WCTYPE
return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
?(true):((unsigned long)iswupper((wchar_t)w)?true:false);
#else
return (w&~0xff)?(true):((unsigned long)isupper((char)w)?true:false);
#endif
}
bool
GStringRep::giswlower(const unsigned long w)
{
#if HAS_WCTYPE
return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
?(true):((unsigned long)iswlower((wchar_t)w)?true:false);
#else
return (w&~0xff)?(true):((unsigned long)islower((char)w)?true:false);
#endif
}
unsigned long
GStringRep::gtowupper(const unsigned long w)
{
#if HAS_WCTYPE
return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
?w:((unsigned long)towupper((wchar_t)w));
#else
return (w&~0xff)?w:((unsigned long)toupper((char)w));
#endif
}
unsigned long
GStringRep::gtowlower(const unsigned long w)
{
#if HAS_WCTYPE
return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
?w:((unsigned long)towlower((wchar_t)w));
#else
return (w&~0xff)?w:((unsigned long)tolower((char)w));
#endif
}
GP<GStringRep>
GStringRep::tocase(
bool (*xiswcase)(const unsigned long wc),
unsigned long (*xtowcase)(const unsigned long wc)) const
{
GP<GStringRep> retval;
char const * const eptr=data+size;
char const *ptr=data;
while(ptr<eptr)
{
char const * const xptr=isCharType(xiswcase,ptr,false);
if(ptr == xptr)
break;
ptr=xptr;
}
if(ptr<eptr)
{
const int n=(int)((size_t)ptr-(size_t)data);
unsigned char *buf;
GPBuffer<unsigned char> gbuf(buf,n+(1+size-n)*6);
if(n>0)
{
strncpy((char *)buf,data,n);
}
unsigned char *buf_ptr=buf+n;
for(char const *ptr=data+n;ptr<eptr;)
{
char const * const xptr=ptr;
const unsigned long w=getValidUCS4(ptr);
if(ptr == xptr)
break;
if(xiswcase(w))
{
const int len=(int)((size_t)ptr-(size_t)xptr);
strncpy((char *)buf_ptr,xptr,len);
buf_ptr+=len;
}else
{
mbstate_t ps;
memset(&ps,0,sizeof(mbstate_t));
buf_ptr=UCS4toString(xtowcase(w),buf_ptr,&ps);
}
}
buf_ptr[0]=0;
retval=substr((const char *)buf,0,(int)((size_t)buf_ptr-(size_t)buf));
}else
{
retval=const_cast<GStringRep *>(this);
}
return retval;
}
// Returns a copy of this string with characters used in XML escaped as follows:
// '<' --> "&lt;"
// '>' --> "&gt;"
// '&' --> "&amp;"
// '\'' --> "&apos;"
// '\"' --> "&quot;"
// Also escapes characters 0x00 through 0x1f and 0x7e through 0x7f.
GP<GStringRep>
GStringRep::toEscaped( const bool tosevenbit ) const
{
bool modified=false;
char *ret;
GPBuffer<char> gret(ret,size*7);
ret[0]=0;
char *retptr=ret;
char const *start=data;
char const *s=start;
char const *last=s;
GP<GStringRep> special;
for(unsigned long w;(w=getValidUCS4(s));last=s)
{
char const *ss=0;
switch(w)
{
case '<':
ss="&lt;";
break;
case '>':
ss="&gt;";
break;
case '&':
ss="&amp;";
break;
case '\47':
ss="&apos;";
break;
case '\42':
ss="&quot;";
break;
default:
if((w<' ')||(w>=0x7e && (tosevenbit || (w < 0x80))))
{
special=toThis(UTF8::create_format("&#%lu;",w));
ss=special->data;
}
break;
}
if(ss)
{
modified=true;
if(s!=start)
{
size_t len=(size_t)last-(size_t)start;
strncpy(retptr,start,len);
retptr+=len;
start=s;
}
if(ss[0])
{
size_t len=strlen(ss);
strcpy(retptr,ss);
retptr+=len;
}
}
}
GP<GStringRep> retval;
if(modified)
{
strcpy(retptr,start);
retval=strdup( ret );
}else
{
retval=const_cast<GStringRep *>(this);
}
// DEBUG_MSG( "Escaped string is '" << ret << "'\n" );
return retval;
}
static const GMap<GUTF8String,GUTF8String> &
BasicMap( void )
{
static GMap<GUTF8String,GUTF8String> Basic;
if (! Basic.size())
{
Basic["lt"] = GUTF8String('<');
Basic["gt"] = GUTF8String('>');
Basic["amp"] = GUTF8String('&');
Basic["apos"] = GUTF8String('\47');
Basic["quot"] = GUTF8String('\42');
}
return Basic;
}
GUTF8String
GUTF8String::fromEscaped( const GMap<GUTF8String,GUTF8String> ConvMap ) const
{
GUTF8String ret; // Build output string here
int start_locn = 0; // Beginning of substring to skip
int amp_locn; // Location of a found ampersand
while( (amp_locn = search( '&', start_locn )) > -1 )
{
// Found the next apostrophe
// Locate the closing semicolon
const int semi_locn = search( ';', amp_locn );
// No closing semicolon, exit and copy
// the rest of the string.
if( semi_locn < 0 )
break;
ret += substr( start_locn, amp_locn - start_locn );
int const len = semi_locn - amp_locn - 1;
if(len)
{
GUTF8String key = substr( amp_locn+1, len);
//DEBUG_MSG( "key = '" << key << "'\n" );
char const * s=key;
if( s[0] == '#')
{
unsigned long value;
char *ptr=0;
if(s[1] == 'x' || s[1] == 'X')
{
value=strtoul((char const *)(s+2),&ptr,16);
}else
{
value=strtoul((char const *)(s+1),&ptr,10);
}
if(ptr)
{
unsigned char utf8char[7];
unsigned char const * const end=GStringRep::UCS4toUTF8(value,utf8char);
ret+=GUTF8String((char const *)utf8char,(size_t)end-(size_t)utf8char);
}else
{
ret += substr( amp_locn, semi_locn - amp_locn + 1 );
}
}else
{
GPosition map_entry = ConvMap.contains( key );
if( map_entry )
{ // Found in the conversion map, substitute
ret += ConvMap[map_entry];
} else
{
static const GMap<GUTF8String,GUTF8String> &Basic = BasicMap();
GPosition map_entry = Basic.contains( key );
if ( map_entry )
{
ret += Basic[map_entry];
}else
{
ret += substr( amp_locn, len+2 );
}
}
}
}else
{
ret += substr( amp_locn, len+2 );
}
start_locn = semi_locn + 1;
// DEBUG_MSG( "ret = '" << ret << "'\n" );
}
// Copy the end of the string to the output
ret += substr( start_locn, length()-start_locn );
// DEBUG_MSG( "Unescaped string is '" << ret << "'\n" );
return (ret == *this)?(*this):ret;
}
GUTF8String
GUTF8String::fromEscaped(void) const
{
const GMap<GUTF8String,GUTF8String> nill;
return fromEscaped(nill);
}
GP<GStringRep>
GStringRep::setat(int n, char ch) const
{
GP<GStringRep> retval;
if(n<0)
n+=size;
if (n < 0 || n>size)
GBaseString::throw_illegal_subscript();
if(ch == data[n])
{
retval=const_cast<GStringRep *>(this);
}else if(!ch)
{
retval=getbuf(n);
}else
{
retval=getbuf((n<size)?size:n);
retval->data[n]=ch;
if(n == size)
retval->data[n+1]=0;
}
return retval;
}
#ifdef WIN32
#define USE_VSNPRINTF _vsnprintf
#endif
#ifdef AUTOCONF
# ifdef HAVE_VSNPRINTF
# define USE_VSNPRINTF vsnprintf
# endif
#else
# ifdef linux
# define USE_VSNPRINTF vsnprintf
# endif
#endif
GUTF8String &
GUTF8String::format(const char fmt[], ... )
{
va_list args;
va_start(args, fmt);
return init(GStringRep::UTF8::create(fmt,args));
}
GP<GStringRep>
GStringRep::UTF8::create_format(const char fmt[],...)
{
va_list args;
va_start(args, fmt);
return create(fmt,args);
}
GP<GStringRep>
GStringRep::vformat(va_list args) const
{
GP<GStringRep> retval;
if(size)
{
#ifndef WIN32
char *nfmt;
GPBuffer<char> gnfmt(nfmt,size+1);
nfmt[0]=0;
int start=0;
#endif
int from=0;
while((from=search('%',from)) >= 0)
{
if(data[++from] != '%')
{
int m,n=0;
sscanf(data+from,"%d!%n",&m,&n);
if(n)
{
#ifdef WIN32
char *lpszFormat=data;
LPTSTR lpszTemp;
if((!::FormatMessage(
FORMAT_MESSAGE_FROM_STRING|FORMAT_MESSAGE_ALLOCATE_BUFFER,
lpszFormat, 0, 0, (LPTSTR)&lpszTemp,0,&args))
|| !lpszTemp)
{
G_THROW(GException::outofmemory);
}
va_end(args);
retval=strdup((const char *)lpszTemp);
LocalFree(lpszTemp);
break;
#else
from+=n;
const int end=search('!',from);
if(end>=0)
{
strncat(nfmt,data+start,(int)(end-start));
strncat(nfmt,"$",1);
start=from=end+1;
}else
{
gnfmt.resize(0);
from=(-1);
break;
}
#endif
}else
{
#ifndef WIN32
gnfmt.resize(0);
#endif
from=(-1);
break;
}
}
}
if(from < 0)
{
#ifndef WIN32
char const * const fmt=(nfmt&&nfmt[0])?nfmt:data;
#else
char const * const fmt=data;
#endif
int buflen=32768;
char *buffer;
GPBuffer<char> gbuffer(buffer,buflen);
ChangeLocale locale(LC_NUMERIC,(isNative()?0:"C"));
// Format string
#ifdef USE_VSNPRINTF
while(USE_VSNPRINTF(buffer, buflen, fmt, args)<0)
{
gbuffer.resize(0);
gbuffer.resize(buflen+32768);
}
va_end(args);
#else
buffer[buflen-1] = 0;
vsprintf(buffer, fmt, args);
va_end(args);
if (buffer[buflen-1])
{
// This isn't as fatal since it is on the stack, but we
// definitely should stop the current operation.
G_THROW( ERR_MSG("GString.overwrite") );
}
#endif
retval=strdup((const char *)buffer);
}
}
// Go altering the string
return retval;
}
int
GStringRep::search(char c, int from) const
{
if (from<0)
from += size;
int retval=(-1);
if (from>=0 && from<size)
{
char const *const s = strchr(data+from,c);
if(s)
retval=(int)((size_t)s-(size_t)data);
}
return retval;
}
int
GStringRep::search(char const *ptr, int from) const
{
if(from<0)
{
from+=size;
if(from<0)
G_THROW( ERR_MSG("GString.bad_subscript") );
}
int retval=(-1);
if (from>=0 && from<size)
{
char const *const s = strstr(data+from,ptr);
if(s)
retval=(int)((size_t)s-(size_t)data);
}
return retval;
}
int
GStringRep::rsearch(char c, int from) const
{
if(from<0)
{
from+=size;
if(from<0)
G_THROW( ERR_MSG("GString.bad_subscript") );
}
int retval=(-1);
if ((from>=0) && (from<size))
{
char const *const s = strrchr(data+from,c);
if(s)
retval=(int)((size_t)s-(size_t)data);
}
return retval;
}
int
GStringRep::rsearch(char const *ptr, int from) const
{
if(from<0)
{
from+=size;
if(from<0)
G_THROW( ERR_MSG("GString.bad_subscript") );
}
int retval=(-1);
for(int loc=from;(loc=search(ptr,loc)) >= 0;++loc)
retval=loc;
return retval;
}
int
GStringRep::contains(const char accept[],int from) const
{
if(from<0)
{
from+=size;
if(from<0)
G_THROW( ERR_MSG("GString.bad_subscript") );
}
int retval=(-1);
if (accept && accept[0] && from>=0 && from<size)
{
char const * const src = data+from;
char const *ptr=strpbrk(src,accept);
if(ptr)
{
retval=(int)(ptr-src)+from;
}
}
return retval;
}
int
GStringRep::rcontains(const char accept[],int from) const
{
int retval=(-1);
while((from=contains(accept,from)) >= 0)
{
retval=from++;
}
return retval;
}
bool
GBaseString::is_int(void) const
{
bool isLong=!!ptr;
if(isLong)
{
int endpos;
(*this)->toLong(0,endpos);
if(endpos>=0)
{
isLong=((*this)->nextNonSpace(endpos) == (int)length());
}
}
return isLong;
}
bool
GBaseString::is_float(void) const
{
bool isDouble=!!ptr;
if(isDouble)
{
int endpos;
(*this)->toDouble(0,endpos);
if(endpos>=0)
{
isDouble=((*this)->nextNonSpace(endpos) == (int)length());
}
}
return isDouble;
}
unsigned int
hash(const GBaseString &str)
{
unsigned int x = 0;
const char *s = (const char*)str;
while (*s)
x = x ^ (x<<6) ^ (unsigned char)(*s++);
return x;
}
void
GBaseString::throw_illegal_subscript()
{
G_THROW( ERR_MSG("GString.bad_subscript") );
}
unsigned char *
GStringRep::UTF8::UCS4toString(
const unsigned long w0,unsigned char *ptr, mbstate_t *) const
{
return UCS4toUTF8(w0,ptr);
}
int
GStringRep::UTF8::ncopy(
wchar_t * const buf, const int buflen ) const
{
int retval=(-1);
if(buf && buflen)
{
buf[0]=0;
if(data[0])
{
const size_t length=strlen(data);
const unsigned char * const eptr=(const unsigned char *)(data+length);
wchar_t *r=buf;
wchar_t const * const rend=buf+buflen;
for(const unsigned char *s=(const unsigned char *)data;(r<rend)&&(s<eptr)&&*s;)
{
const unsigned long w0=UTF8toUCS4(s,eptr);
unsigned short w1;
unsigned short w2=1;
for(int count=(sizeof(wchar_t) == sizeof(w1))?UCS4toUTF16(w0,w1,w2):1;
count&&(r<rend);
--count,w1=w2,++r)
{
r[0]=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0;
}
}
if(r<rend)
{
r[0]=0;
retval=((size_t)r-(size_t)buf)/sizeof(wchar_t);
}
}else
{
retval=0;
}
}
return retval;
}
GP<GStringRep>
GStringRep::UTF8::toNative(const EscapeMode escape) const
{
GP<GStringRep> retval;
if(data[0])
{
const size_t length=strlen(data);
const unsigned char * const eptr=(const unsigned char *)(data+length);
unsigned char *buf;
GPBuffer<unsigned char> gbuf(buf,12*length+12);
unsigned char *r=buf;
mbstate_t ps;
memset(&ps,0,sizeof(mbstate_t));
for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;)
{
const unsigned long w0=UTF8toUCS4(s,eptr);
const unsigned char * const r0=r;
r=UCS4toNative(w0,r,&ps);
if(r == r0)
{
if(escape == IS_ESCAPED)
{
sprintf((char *)r,"&#%lu;",w0);
r+=strlen((char *)r);
}else
{
r=buf;
break;
}
}
}
r[0]=0;
retval = NATIVE_CREATE( (const char *)buf );
} else
{
retval = NATIVE_CREATE( (unsigned int)0 );
}
return retval;
}
GP<GStringRep>
GStringRep::UTF8::toUTF8(const bool nothrow) const
{
if(!nothrow)
G_THROW( ERR_MSG("GStringRep.UTF8ToUTF8") );
return const_cast<GStringRep::UTF8 *>(this);
}
// Tests if a string is legally encoded in the current character set.
bool
GStringRep::UTF8::is_valid(void) const
{
bool retval=true;
if(data && size)
{
const unsigned char * const eptr=(const unsigned char *)(data+size);
for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;)
{
const unsigned char * const r=s;
(void)UTF8toUCS4(s,eptr);
if(r == s)
{
retval=false;
break;
}
}
}
return retval;
}
static inline unsigned long
add_char(unsigned long const U, unsigned char const * const r)
{
unsigned long const C=r[0];
return ((C|0x3f) == 0xbf)?((U<<6)|(C&0x3f)):0;
}
unsigned long
GStringRep::UTF8toUCS4(
unsigned char const *&s,void const * const eptr)
{
unsigned long U=0;
unsigned char const *r=s;
if(r < eptr)
{
unsigned long const C1=r++[0];
if(C1&0x80)
{
if(r < eptr)
{
U=C1;
if((U=((C1&0x40)?add_char(U,r++):0)))
{
if(C1&0x20)
{
if(r < eptr)
{
if((U=add_char(U,r++)))
{
if(C1&0x10)
{
if(r < eptr)
{
if((U=add_char(U,r++)))
{
if(C1&0x8)
{
if(r < eptr)
{
if((U=add_char(U,r++)))
{
if(C1&0x4)
{
if(r < eptr)
{
if((U=((!(C1&0x2))?(add_char(U,r++)&0x7fffffff):0)))
{
s=r;
}else
{
U=(unsigned int)(-1)-s++[0];
}
}else
{
U=0;
}
}else if((U=((U&0x4000000)?0:(U&0x3ffffff))))
{
s=r;
}
}else
{
U=(unsigned int)(-1)-s++[0];
}
}else
{
U=0;
}
}else if((U=((U&0x200000)?0:(U&0x1fffff))))
{
s=r;
}
}else
{
U=(unsigned int)(-1)-s++[0];
}
}else
{
U=0;
}
}else if((U=((U&0x10000)?0:(U&0xffff))))
{
s=r;
}
}else
{
U=(unsigned int)(-1)-s++[0];
}
}else
{
U=0;
}
}else if((U=((U&0x800)?0:(U&0x7ff))))
{
s=r;
}
}else
{
U=(unsigned int)(-1)-s++[0];
}
}else
{
U=0;
}
}else if((U=C1))
{
s=r;
}
}
return U;
}
unsigned char *
GStringRep::UCS4toUTF8(const unsigned long w,unsigned char *ptr)
{
if(w <= 0x7f)
{
*ptr++ = (unsigned char)w;
}
else if(w <= 0x7ff)
{
*ptr++ = (unsigned char)((w>>6)|0xC0);
*ptr++ = (unsigned char)((w|0x80)&0xBF);
}
else if(w <= 0xFFFF)
{
*ptr++ = (unsigned char)((w>>12)|0xE0);
*ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
*ptr++ = (unsigned char)((w|0x80)&0xBF);
}
else if(w <= 0x1FFFFF)
{
*ptr++ = (unsigned char)((w>>18)|0xF0);
*ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
*ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
*ptr++ = (unsigned char)((w|0x80)&0xBF);
}
else if(w <= 0x3FFFFFF)
{
*ptr++ = (unsigned char)((w>>24)|0xF8);
*ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF);
*ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
*ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
*ptr++ = (unsigned char)((w|0x80)&0xBF);
}
else if(w <= 0x7FFFFFFF)
{
*ptr++ = (unsigned char)((w>>30)|0xFC);
*ptr++ = (unsigned char)(((w>>24)|0x80)&0xBF);
*ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF);
*ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
*ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
*ptr++ = (unsigned char)((w|0x80)&0xBF);
}
else
{
*ptr++ = '?';
}
return ptr;
}
// Creates with a concat operation.
GP<GStringRep>
GStringRep::concat( const char *s1, const GP<GStringRep> &s2) const
{
GP<GStringRep> retval;
if(s2)
{
retval=toThis(s2);
if(s1 && s1[0])
{
if(retval)
{
retval=concat(s1,retval->data);
}else
{
retval=strdup(s1);
}
}
}else if(s1 && s1[0])
{
retval=strdup(s1);
}
return retval;
}
// Creates with a concat operation.
GP<GStringRep>
GStringRep::concat( const GP<GStringRep> &s1,const char *s2) const
{
GP<GStringRep> retval;
if(s1)
{
retval=toThis(s1);
if(s2 && s2[0])
{
if(retval)
{
retval=retval->append(s2);
}else
{
retval=strdup(s2);
}
}
}else if(s2 && s2[0])
{
retval=strdup(s2);
}
return retval;
}
GP<GStringRep>
GStringRep::concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const
{
GP<GStringRep> retval;
if(s1)
{
retval=toThis(s1,s2);
if(retval && s2)
{
retval=retval->append(toThis(s2));
}
}else if(s2)
{
retval=toThis(s2);
}
return retval;
}
#ifdef WIN32
static const char *setlocale_win32(void)
{
static const char *locale=setlocale(LC_ALL,0);
if(! locale || (locale[0] == 'C' && !locale[1]))
{
locale=setlocale(LC_ALL,"");
}
return locale;
}
#endif
GStringRep::GStringRep(void)
{
#ifdef WIN32
static const char *locale=setlocale_win32();
#endif
size=0;
data=0;
}
GStringRep::~GStringRep()
{
if(data)
{
data[0]=0;
::operator delete(data);
}
data=0;
}
GStringRep::UTF8::UTF8(void) {}
GStringRep::UTF8::~UTF8() {}
int
GStringRep::cmp(const char *s1,const int len) const
{
return cmp(data,s1,len);
}
int
GStringRep::cmp(const char *s1, const char *s2,const int len)
{
return (len
?((s1&&s1[0])
?((s2&&s2[0])
?((len>0)
?strncmp(s1,s2,len)
:strcmp(s1,s2))
:1)
:((s2&&s2[0])?(-1):0))
:0);
}
int
GStringRep::cmp(const GP<GStringRep> &s1, const GP<GStringRep> &s2,
const int len )
{
return (s1?(s1->cmp(s2,len)):cmp(0,(s2?(s2->data):0),len));
}
int
GStringRep::cmp(const GP<GStringRep> &s1, const char *s2,
const int len )
{
return cmp((s1?s1->data:0),s2,len);
}
int
GStringRep::cmp(const char *s1, const GP<GStringRep> &s2,
const int len )
{
return cmp(s1,(s2?(s2->data):0),len);
}
int
GStringRep::UTF8::cmp(const GP<GStringRep> &s2,const int len) const
{
int retval;
if(s2)
{
if(s2->isNative())
{
GP<GStringRep> r(s2->toUTF8(true));
if(r)
{
retval=GStringRep::cmp(data,r->data,len);
}else
{
retval=-(s2->cmp(toNative(NOT_ESCAPED),len));
}
}else
{
retval=GStringRep::cmp(data,s2->data,len);
}
}else
{
retval=GStringRep::cmp(data,0,len);
}
return retval;
}
int
GStringRep::UTF8::toInt() const
{
int endpos;
return (int)toLong(0,endpos);
}
static inline long
Cstrtol(char *data,char **edata, const int base)
{
GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
while (data && *data==' ') data++;
return strtol(data,edata,base);
}
long
GStringRep::UTF8::toLong(
const int pos, int &endpos, const int base) const
{
char *edata=0;
long retval=Cstrtol(data+pos,&edata, base);
if(edata)
{
endpos=edata-data;
}else
{
endpos=(-1);
GP<GStringRep> ptr=ptr->strdup(data+pos);
if(ptr)
ptr=ptr->toNative(NOT_ESCAPED);
if(ptr)
{
int xendpos;
retval=ptr->toLong(0,xendpos,base);
if(xendpos> 0)
{
endpos=(int)size;
ptr=ptr->strdup(data+xendpos);
if(ptr)
{
ptr=ptr->toUTF8(true);
if(ptr)
{
endpos-=(int)(ptr->size);
}
}
}
}
}
return retval;
}
static inline unsigned long
Cstrtoul(char *data,char **edata, const int base)
{
GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
while (data && *data==' ') data++;
return strtoul(data,edata,base);
}
unsigned long
GStringRep::UTF8::toULong(
const int pos, int &endpos, const int base) const
{
char *edata=0;
unsigned long retval=Cstrtoul(data+pos,&edata, base);
if(edata)
{
endpos=edata-data;
}else
{
endpos=(-1);
GP<GStringRep> ptr=ptr->strdup(data+pos);
if(ptr)
ptr=ptr->toNative(NOT_ESCAPED);
if(ptr)
{
int xendpos;
retval=ptr->toULong(0,xendpos,base);
if(xendpos> 0)
{
endpos=(int)size;
ptr=ptr->strdup(data+xendpos);
if(ptr)
{
ptr=ptr->toUTF8(true);
if(ptr)
{
endpos-=(int)(ptr->size);
}
}
}
}
}
return retval;
}
static inline double
Cstrtod(char *data,char **edata)
{
GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
while (data && *data==' ') data++;
return strtod(data,edata);
}
double
GStringRep::UTF8::toDouble(const int pos, int &endpos) const
{
char *edata=0;
double retval=Cstrtod(data+pos,&edata);
if(edata)
{
endpos=edata-data;
}else
{
endpos=(-1);
GP<GStringRep> ptr=ptr->strdup(data+pos);
if(ptr)
ptr=ptr->toNative(NOT_ESCAPED);
if(ptr)
{
int xendpos;
retval=ptr->toDouble(0,xendpos);
if(xendpos >= 0)
{
endpos=(int)size;
ptr=ptr->strdup(data+xendpos);
if(ptr)
{
ptr=ptr->toUTF8(true);
if(ptr)
{
endpos-=(int)(ptr->size);
}
}
}
}
}
return retval;
}
int
GStringRep::getUCS4(unsigned long &w, const int from) const
{
int retval;
if(from>=size)
{
w=0;
retval=size;
}else if(from<0)
{
w=(unsigned int)(-1);
retval=(-1);
}else
{
const char *source=data+from;
w=getValidUCS4(source);
retval=(int)((size_t)source-(size_t)data);
}
return retval;
}
unsigned long
GStringRep::UTF8::getValidUCS4(const char *&source) const
{
return GStringRep::UTF8toUCS4((const unsigned char *&)source,data+size);
}
int
GStringRep::nextNonSpace(const int from,const int len) const
{
return nextCharType(giswspace,from,len,true);
}
int
GStringRep::nextSpace(const int from,const int len) const
{
return nextCharType(giswspace,from,len,false);
}
int
GStringRep::nextChar(const int from) const
{
char const * xptr=data+from;
(void)getValidUCS4(xptr);
return (int)((size_t)xptr-(size_t)data);
}
int
GStringRep::firstEndSpace(int from,const int len) const
{
const int xsize=(len<0)?size:(from+len);
const int ysize=(size<xsize)?size:xsize;
int retval=ysize;
while(from<ysize)
{
from=nextNonSpace(from,ysize-from);
if(from < size)
{
const int r=nextSpace(from,ysize-from);
// If a character isn't legal, then it will return
// tru for both nextSpace and nextNonSpace.
if(r == from)
{
from++;
}else
{
from=retval=r;
}
}
}
return retval;
}
int
GStringRep::UCS4toUTF16(
const unsigned long w,unsigned short &w1, unsigned short &w2)
{
int retval;
if(w<0x10000)
{
w1=(unsigned short)w;
w2=0;
retval=1;
}else
{
w1=(unsigned short)((((w-0x10000)>>10)&0x3ff)+0xD800);
w2=(unsigned short)((w&0x3ff)+0xDC00);
retval=2;
}
return retval;
}
int
GStringRep::UTF16toUCS4(
unsigned long &U,unsigned short const * const s,void const * const eptr)
{
int retval=0;
U=0;
unsigned short const * const r=s+1;
if(r <= eptr)
{
unsigned long const W1=s[0];
if((W1<0xD800)||(W1>0xDFFF))
{
if((U=W1))
{
retval=1;
}
}else if(W1<=0xDBFF)
{
unsigned short const * const rr=r+1;
if(rr <= eptr)
{
unsigned long const W2=s[1];
if(((W2>=0xDC00)||(W2<=0xDFFF))&&((U=(0x10000+((W1&0x3ff)<<10))|(W2&0x3ff))))
{
retval=2;
}else
{
retval=(-1);
}
}
}
}
return retval;
}
//bcr
GUTF8String&
GUTF8String::operator+= (char ch)
{
return init(
GStringRep::UTF8::create((const char*)*this,
GStringRep::UTF8::create(&ch,0,1)));
}
GUTF8String&
GUTF8String::operator+= (const char *str)
{
return init(GStringRep::UTF8::create(*this,str));
}
GUTF8String&
GUTF8String::operator+= (const GBaseString &str)
{
return init(GStringRep::UTF8::create(*this,str));
}
GUTF8String
GUTF8String::substr(int from, int len) const
{ return GUTF8String(*this, from, len); }
GUTF8String
GUTF8String::operator+(const GBaseString &s2) const
{ return GStringRep::UTF8::create(*this,s2); }
GUTF8String
GUTF8String::operator+(const GUTF8String &s2) const
{ return GStringRep::UTF8::create(*this,s2); }
GUTF8String
GUTF8String::operator+(const char *s2) const
{ return GStringRep::UTF8::create(*this,s2); }
char *
GUTF8String::getbuf(int n)
{
if(ptr)
init((*this)->getbuf(n));
else if(n>0)
init(GStringRep::UTF8::create(n));
else
init(0);
return ptr?((*this)->data):0;
}
void
GUTF8String::setat(const int n, const char ch)
{
if((!n)&&(!ptr))
{
init(GStringRep::UTF8::create(&ch,0,1));
}else
{
init((*this)->setat(CheckSubscript(n),ch));
}
}
GP<GStringRep>
GStringRep::UTF8ToNative( const char *s, const EscapeMode escape )
{
return GStringRep::UTF8::create(s)->toNative(escape);
}
GUTF8String::GUTF8String(const char dat)
{ init(GStringRep::UTF8::create(&dat,0,1)); }
GUTF8String::GUTF8String(const GUTF8String &fmt, va_list &args)
{
if (fmt.ptr)
init(fmt->vformat(args));
else
init(fmt);
}
GUTF8String::GUTF8String(const char *str)
{ init(GStringRep::UTF8::create(str)); }
GUTF8String::GUTF8String(const unsigned char *str)
{ init(GStringRep::UTF8::create((const char *)str)); }
GUTF8String::GUTF8String(const unsigned short *str)
{ init(GStringRep::UTF8::create(str,0,-1)); }
GUTF8String::GUTF8String(const unsigned long *str)
{ init(GStringRep::UTF8::create(str,0,-1)); }
GUTF8String::GUTF8String(const char *dat, unsigned int len)
{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
GUTF8String::GUTF8String(const unsigned short *dat, unsigned int len)
{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
GUTF8String::GUTF8String(const unsigned long *dat, unsigned int len)
{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
GUTF8String::GUTF8String(const GBaseString &gs, int from, int len)
{ init(GStringRep::UTF8::create(gs,from,((int)len<0)?(-1):(int)len)); }
GUTF8String::GUTF8String(const int number)
{ init(GStringRep::UTF8::create_format("%d",number)); }
GUTF8String::GUTF8String(const double number)
{ init(GStringRep::UTF8::create_format("%f",number)); }
GUTF8String& GUTF8String::operator= (const char str)
{ return init(GStringRep::UTF8::create(&str,0,1)); }
GUTF8String& GUTF8String::operator= (const char *str)
{ return init(GStringRep::UTF8::create(str)); }
GUTF8String GBaseString::operator+(const GUTF8String &s2) const
{ return GStringRep::UTF8::create(*this,s2); }
#if HAS_WCHAR
GUTF8String
GNativeString::operator+(const GUTF8String &s2) const
{
if (ptr)
return GStringRep::UTF8::create((*this)->toUTF8(true),s2);
else
return GStringRep::UTF8::create((*this),s2);
}
#endif
GUTF8String
GUTF8String::operator+(const GNativeString &s2) const
{
GP<GStringRep> g = s2;
if (s2.ptr)
g = s2->toUTF8(true);
return GStringRep::UTF8::create(*this,g);
}
GUTF8String
operator+(const char *s1, const GUTF8String &s2)
{ return GStringRep::UTF8::create(s1,s2); }
#if HAS_WCHAR
GNativeString
operator+(const char *s1, const GNativeString &s2)
{ return GStringRep::Native::create(s1,s2); }
GNativeString&
GNativeString::operator+= (char ch)
{
char s[2]; s[0]=ch; s[1]=0;
return init(GStringRep::Native::create((const char*)*this, s));
}
GNativeString&
GNativeString::operator+= (const char *str)
{
return init(GStringRep::Native::create(*this,str));
}
GNativeString&
GNativeString::operator+= (const GBaseString &str)
{
return init(GStringRep::Native::create(*this,str));
}
GNativeString
GNativeString::operator+(const GBaseString &s2) const
{ return GStringRep::Native::create(*this,s2); }
GNativeString
GNativeString::operator+(const GNativeString &s2) const
{ return GStringRep::Native::create(*this,s2); }
GNativeString
GNativeString::operator+(const char *s2) const
{ return GStringRep::Native::create(*this,s2); }
char *
GNativeString::getbuf(int n)
{
if(ptr)
init((*this)->getbuf(n));
else if(n>0)
init(GStringRep::Native::create(n));
else
init(0);
return ptr?((*this)->data):0;
}
void
GNativeString::setat(const int n, const char ch)
{
if((!n)&&(!ptr))
{
init(GStringRep::Native::create(&ch,0,1));
}else
{
init((*this)->setat(CheckSubscript(n),ch));
}
}
#endif
#ifdef HAVE_NAMESPACES
}
# ifndef NOT_USING_DJVU_NAMESPACE
using namespace DJVU;
# endif
#endif