You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdepim/kmail/encodingdetector.h

167 lines
4.5 KiB

/*
This file was taken from the KDE 4.x libraries and backported to TQt 3.
Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
Copyright (C) 2007 Nick Shaforostoff (shafff@ukr.net)
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#ifndef ENCODINGDETECTOR_H
#define ENCODINGDETECTOR_H
#include <tqstring.h>
class TQTextCodec;
class TQTextDecoder;
class EncodingDetectorPrivate;
/**
* @short Provides encoding detection capabilities.
*
* Searches for encoding declaration inside raw data -- meta and xml tags.
* In the case it can't find it, uses heuristics for specified language.
*
* If it finds unicode BOM marks, it changes encoding regardless of what the user has told
*
* Intended lifetime of the object: one instance per document.
*
* Typical use:
* \code
* TQByteArray data;
* ...
* EncodingDetector detector;
* detector.setAutoDetectLanguage(EncodingDetector::Cyrillic);
* TQString out=detector.decode(data);
* \endcode
*
*
* Do not mix decode() with decodeWithBuffering()
*
* @short Guess encoding of char array
*
*/
class EncodingDetector
{
public:
enum EncodingChoiceSource
{
DefaultEncoding,
AutoDetectedEncoding,
BOM,
EncodingFromXMLHeader,
EncodingFromMetaTag,
EncodingFromHTTPHeader,
UserChosenEncoding
};
enum AutoDetectScript
{
None,
SemiautomaticDetection,
Arabic,
Baltic,
CentralEuropean,
ChineseSimplified,
ChineseTraditional,
Cyrillic,
Greek,
Hebrew,
Japanese,
Korean,
NorthernSaami,
SouthEasternEurope,
Thai,
Turkish,
Unicode,
WesternEuropean
};
/**
* Default codec is latin1 (as html spec says), EncodingChoiceSource is default, AutoDetectScript=Semiautomatic
*/
EncodingDetector();
/**
* Allows to set Default codec, EncodingChoiceSource, AutoDetectScript
*/
EncodingDetector(TQTextCodec* codec, EncodingChoiceSource source, AutoDetectScript script=None);
~EncodingDetector();
//const TQTextCodec* codec() const;
/**
* @returns true if specified encoding was recognized
*/
bool setEncoding(const char *encoding, EncodingChoiceSource type);
/**
* Convenience method.
* @returns mime name of detected encoding
*/
const char* encoding() const;
bool visuallyOrdered() const;
// void setAutoDetectLanguage( const TQString& );
// const TQString& autoDetectLanguage() const;
void setAutoDetectLanguage( AutoDetectScript );
AutoDetectScript autoDetectLanguage() const;
EncodingChoiceSource encodingChoiceSource() const;
/**
* Analyze text data.
* @returns true if there was enough data for accurate detection
*/
bool analyze( const char *data, int len );
/**
* Analyze text data.
* @returns true if there was enough data for accurate detection
*/
bool analyze( const TQByteArray &data );
/**
* Takes lang name _after_ it were i18n()'ed
*/
static AutoDetectScript scriptForName(const TQString& lang);
static TQString nameForScript(AutoDetectScript);
static AutoDetectScript scriptForLanguageCode(const TQString &lang);
static bool hasAutoDetectionForScript(AutoDetectScript);
protected:
/**
* Check if we are really utf8. Taken from kate
*
* @returns true if current encoding is utf8 and the text cannot be in this encoding
*
* Please somebody read http://de.wikipedia.org/wiki/UTF-8 and check this code...
*/
bool errorsIfUtf8 (const char* data, int length);
/**
* @returns TQTextDecoder for detected encoding
*/
TQTextDecoder* decoder();
private:
EncodingDetectorPrivate* const d;
};
#endif