You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
612 lines
33 KiB
612 lines
33 KiB
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|
<!-- /home/espenr/tmp/qt-3.3.8-espenr-2499/qt-x11-free-3.3.8/src/codecs/tqtextcodec.cpp:201 -->
|
|
<html>
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
|
<title>TQTextCodec Class</title>
|
|
<style type="text/css"><!--
|
|
fn { margin-left: 1cm; text-indent: -1cm; }
|
|
a:link { color: #004faf; text-decoration: none }
|
|
a:visited { color: #672967; text-decoration: none }
|
|
body { background: #ffffff; color: black; }
|
|
--></style>
|
|
</head>
|
|
<body>
|
|
|
|
<table border="0" cellpadding="0" cellspacing="0" width="100%">
|
|
<tr bgcolor="#E5E5E5">
|
|
<td valign=center>
|
|
<a href="index.html">
|
|
<font color="#004faf">Home</font></a>
|
|
| <a href="classes.html">
|
|
<font color="#004faf">All Classes</font></a>
|
|
| <a href="mainclasses.html">
|
|
<font color="#004faf">Main Classes</font></a>
|
|
| <a href="annotated.html">
|
|
<font color="#004faf">Annotated</font></a>
|
|
| <a href="groups.html">
|
|
<font color="#004faf">Grouped Classes</font></a>
|
|
| <a href="functions.html">
|
|
<font color="#004faf">Functions</font></a>
|
|
</td>
|
|
<td align="right" valign="center"><img src="logo32.png" align="right" width="64" height="32" border="0"></td></tr></table><h1 align=center>TQTextCodec Class Reference</h1>
|
|
|
|
<p>The TQTextCodec class provides conversion between text encodings.
|
|
<a href="#details">More...</a>
|
|
<p>Almost all the functions in this class are <a href="threads.html#reentrant">reentrant</a> when TQt is built with thread support. The exceptions are <a href="#~TQTextCodec"><b>~TQTextCodec</b></a>(), <a href="#setCodecForTr"><b>setCodecForTr</b></a>(), <a href="#setCodecForCStrings"><b>setCodecForCStrings</b></a>(), and <a href="#TQTextCodec"><b>TQTextCodec</b></a>().
|
|
</p><p><tt>#include <<a href="tqtextcodec-h.html">tqtextcodec.h</a>></tt>
|
|
<p>Inherited by <a href="tqbig5codec.html">TQBig5Codec</a>, <a href="tqbig5hkscscodec.html">TQBig5hkscsCodec</a>, <a href="tqeucjpcodec.html">TQEucJpCodec</a>, <a href="tqeuckrcodec.html">TQEucKrCodec</a>, <a href="tqgb18030codec.html">TQGb18030Codec</a>, <a href="tqjiscodec.html">TQJisCodec</a>, <a href="tqhebrewcodec.html">TQHebrewCodec</a>, <a href="tqsjiscodec.html">TQSjisCodec</a>, and <a href="tqtsciicodec.html">TQTsciiCodec</a>.
|
|
<p><a href="tqtextcodec-members.html">List of all member functions.</a>
|
|
<h2>Public Members</h2>
|
|
<ul>
|
|
<li class=fn>virtual <a href="#~TQTextCodec"><b>~TQTextCodec</b></a> ()</li>
|
|
<li class=fn>virtual const char * <a href="#name"><b>name</b></a> () const = 0</li>
|
|
<li class=fn>virtual const char * <a href="#mimeName"><b>mimeName</b></a> () const</li>
|
|
<li class=fn>virtual int <a href="#mibEnum"><b>mibEnum</b></a> () const = 0</li>
|
|
<li class=fn>virtual TQTextDecoder * <a href="#makeDecoder"><b>makeDecoder</b></a> () const</li>
|
|
<li class=fn>virtual TQTextEncoder * <a href="#makeEncoder"><b>makeEncoder</b></a> () const</li>
|
|
<li class=fn>virtual TQString <a href="#toUnicode"><b>toUnicode</b></a> ( const char * chars, int len ) const</li>
|
|
<li class=fn>virtual TQCString <a href="#fromUnicode"><b>fromUnicode</b></a> ( const TQString & uc, int & lenInOut ) const</li>
|
|
<li class=fn>TQCString <a href="#fromUnicode-2"><b>fromUnicode</b></a> ( const TQString & uc ) const</li>
|
|
<li class=fn>TQString <a href="#toUnicode-2"><b>toUnicode</b></a> ( const TQByteArray & a, int len ) const</li>
|
|
<li class=fn>TQString <a href="#toUnicode-3"><b>toUnicode</b></a> ( const TQByteArray & a ) const</li>
|
|
<li class=fn>TQString <a href="#toUnicode-4"><b>toUnicode</b></a> ( const TQCString & a, int len ) const</li>
|
|
<li class=fn>TQString <a href="#toUnicode-5"><b>toUnicode</b></a> ( const TQCString & a ) const</li>
|
|
<li class=fn>TQString <a href="#toUnicode-6"><b>toUnicode</b></a> ( const char * chars ) const</li>
|
|
<li class=fn>virtual bool <a href="#canEncode"><b>canEncode</b></a> ( TQChar ch ) const</li>
|
|
<li class=fn>virtual bool <a href="#canEncode-2"><b>canEncode</b></a> ( const TQString & s ) const</li>
|
|
<li class=fn>virtual int <a href="#heuristicContentMatch"><b>heuristicContentMatch</b></a> ( const char * chars, int len ) const = 0</li>
|
|
<li class=fn>virtual int <a href="#heuristicNameMatch"><b>heuristicNameMatch</b></a> ( const char * hint ) const</li>
|
|
</ul>
|
|
<h2>Static Public Members</h2>
|
|
<ul>
|
|
<li class=fn>TQTextCodec * <a href="#loadCharmap"><b>loadCharmap</b></a> ( TQIODevice * iod )</li>
|
|
<li class=fn>TQTextCodec * <a href="#loadCharmapFile"><b>loadCharmapFile</b></a> ( TQString filename )</li>
|
|
<li class=fn>TQTextCodec * <a href="#codecForMib"><b>codecForMib</b></a> ( int mib )</li>
|
|
<li class=fn>TQTextCodec * <a href="#codecForName"><b>codecForName</b></a> ( const char * name, int accuracy = 0 )</li>
|
|
<li class=fn>TQTextCodec * <a href="#codecForContent"><b>codecForContent</b></a> ( const char * chars, int len )</li>
|
|
<li class=fn>TQTextCodec * <a href="#codecForIndex"><b>codecForIndex</b></a> ( int i )</li>
|
|
<li class=fn>TQTextCodec * <a href="#codecForLocale"><b>codecForLocale</b></a> ()</li>
|
|
<li class=fn>void <a href="#setCodecForLocale"><b>setCodecForLocale</b></a> ( TQTextCodec * c )</li>
|
|
<li class=fn>TQTextCodec * <a href="#codecForTr"><b>codecForTr</b></a> ()</li>
|
|
<li class=fn>void <a href="#setCodecForTr"><b>setCodecForTr</b></a> ( TQTextCodec * c )</li>
|
|
<li class=fn>TQTextCodec * <a href="#codecForCStrings"><b>codecForCStrings</b></a> ()</li>
|
|
<li class=fn>void <a href="#setCodecForCStrings"><b>setCodecForCStrings</b></a> ( TQTextCodec * c )</li>
|
|
<li class=fn>void <a href="#deleteAllCodecs"><b>deleteAllCodecs</b></a> ()</li>
|
|
<li class=fn>const char * <a href="#locale"><b>locale</b></a> ()</li>
|
|
</ul>
|
|
<h2>Protected Members</h2>
|
|
<ul>
|
|
<li class=fn><a href="#TQTextCodec"><b>TQTextCodec</b></a> ()</li>
|
|
</ul>
|
|
<h2>Static Protected Members</h2>
|
|
<ul>
|
|
<li class=fn>int <a href="#simpleHeuristicNameMatch"><b>simpleHeuristicNameMatch</b></a> ( const char * name, const char * hint )</li>
|
|
</ul>
|
|
<hr><a name="details"></a><h2>Detailed Description</h2>
|
|
|
|
|
|
The TQTextCodec class provides conversion between text encodings.
|
|
|
|
|
|
<p> TQt uses Unicode to store, draw and manipulate strings. In many
|
|
situations you may wish to deal with data that uses a different
|
|
encoding. For example, most Japanese documents are still stored in
|
|
Shift-JIS or ISO2022, while Russian users often have their
|
|
documents in KOI8-R or CP1251.
|
|
<p> TQt provides a set of TQTextCodec classes to help with converting
|
|
non-Unicode formats to and from Unicode. You can also create your
|
|
own codec classes (<a href="#subclassing">see later</a>).
|
|
<p> The supported encodings are:
|
|
<ul>
|
|
<li> Latin1
|
|
<li> Big5 -- Chinese
|
|
<li> Big5-HKSCS -- Chinese
|
|
<li> eucJP -- Japanese
|
|
<li> eucKR -- Korean
|
|
<li> GB2312 -- Chinese
|
|
<li> GBK -- Chinese
|
|
<li> GB18030 -- Chinese
|
|
<li> JIS7 -- Japanese
|
|
<li> Shift-JIS -- Japanese
|
|
<li> TSCII -- Tamil
|
|
<li> utf8 -- Unicode, 8-bit
|
|
<li> utf16 -- Unicode
|
|
<li> KOI8-R -- Russian
|
|
<li> KOI8-U -- Ukrainian
|
|
<li> ISO8859-1 -- Western
|
|
<li> ISO8859-2 -- Central European
|
|
<li> ISO8859-3 -- Central European
|
|
<li> ISO8859-4 -- Baltic
|
|
<li> ISO8859-5 -- Cyrillic
|
|
<li> ISO8859-6 -- Arabic
|
|
<li> ISO8859-7 -- Greek
|
|
<li> ISO8859-8 -- Hebrew, visually ordered
|
|
<li> ISO8859-8-i -- Hebrew, logically ordered
|
|
<li> ISO8859-9 -- Turkish
|
|
<li> ISO8859-10
|
|
<li> ISO8859-13
|
|
<li> ISO8859-14
|
|
<li> ISO8859-15 -- Western
|
|
<li> IBM 850
|
|
<li> IBM 866
|
|
<li> CP874
|
|
<li> CP1250 -- Central European
|
|
<li> CP1251 -- Cyrillic
|
|
<li> CP1252 -- Western
|
|
<li> CP1253 -- Greek
|
|
<li> CP1254 -- Turkish
|
|
<li> CP1255 -- Hebrew
|
|
<li> CP1256 -- Arabic
|
|
<li> CP1257 -- Baltic
|
|
<li> CP1258
|
|
<li> Apple Roman
|
|
<li> TIS-620 -- Thai
|
|
</ul>
|
|
<p> TQTextCodecs can be used as follows to convert some locally encoded
|
|
string to Unicode. Suppose you have some string encoded in Russian
|
|
KOI8-R encoding, and want to convert it to Unicode. The simple way
|
|
to do this is:
|
|
<p> <pre>
|
|
<a href="ntqcstring.html">TQCString</a> locallyEncoded = "..."; // text to convert
|
|
TQTextCodec *codec = TQTextCodec::<a href="#codecForName">codecForName</a>("KOI8-R"); // get the codec for KOI8-R
|
|
<a href="tqstring.html">TQString</a> unicodeString = codec-><a href="#toUnicode">toUnicode</a>( locallyEncoded );
|
|
</pre>
|
|
|
|
<p> After this, <tt>unicodeString</tt> holds the text converted to Unicode.
|
|
Converting a string from Unicode to the local encoding is just as
|
|
easy:
|
|
<p> <pre>
|
|
<a href="tqstring.html">TQString</a> unicodeString = "..."; // any Unicode text
|
|
TQTextCodec *codec = TQTextCodec::<a href="#codecForName">codecForName</a>("KOI8-R"); // get the codec for KOI8-R
|
|
<a href="ntqcstring.html">TQCString</a> locallyEncoded = codec-><a href="#fromUnicode">fromUnicode</a>( unicodeString );
|
|
</pre>
|
|
|
|
<p> Some care must be taken when trying to convert the data in chunks,
|
|
for example, when receiving it over a network. In such cases it is
|
|
possible that a multi-byte character will be split over two
|
|
chunks. At best this might result in the loss of a character and
|
|
at worst cause the entire conversion to fail.
|
|
<p> The approach to use in these situations is to create a <a href="tqtextdecoder.html">TQTextDecoder</a>
|
|
object for the codec and use this TQTextDecoder for the whole
|
|
decoding process, as shown below:
|
|
<p> <pre>
|
|
TQTextCodec *codec = TQTextCodec::<a href="#codecForName">codecForName</a>( "Shift-JIS" );
|
|
<a href="tqtextdecoder.html">TQTextDecoder</a> *decoder = codec-><a href="#makeDecoder">makeDecoder</a>();
|
|
|
|
<a href="tqstring.html">TQString</a> unicodeString;
|
|
while( receiving_data ) {
|
|
<a href="qbytearray.html">TQByteArray</a> chunk = new_data;
|
|
unicodeString += decoder-><a href="tqtextdecoder.html#toUnicode">toUnicode</a>( chunk.<a href="ntqmemarray.html#data">data</a>(), chunk.length() );
|
|
}
|
|
</pre>
|
|
|
|
<p> The TQTextDecoder object maintains state between chunks and therefore
|
|
works correctly even if a multi-byte character is split between
|
|
chunks.
|
|
<p> <a name="subclassing"></a>
|
|
<h3> Creating your own Codec class
|
|
</h3>
|
|
<a name="1"></a><p> Support for new text encodings can be added to TQt by creating
|
|
TQTextCodec subclasses.
|
|
<p> Built-in codecs can be overridden by custom codecs since more
|
|
recently created TQTextCodec objects take precedence over earlier
|
|
ones.
|
|
<p> You may find it more convenient to make your codec class available
|
|
as a plugin; see the <a href="plugins-howto.html">plugin
|
|
documentation</a> for more details.
|
|
<p> The abstract virtual functions describe the encoder to the
|
|
system and the coder is used as required in the different
|
|
text file formats supported by <a href="tqtextstream.html">TQTextStream</a>, and under X11, for the
|
|
locale-specific character input and output.
|
|
<p> To add support for another 8-bit encoding to TQt, make a subclass
|
|
of TQTextCodec and implement at least the following methods:
|
|
<p> <pre>
|
|
const char* name() const
|
|
</pre>
|
|
|
|
Return the official name for the encoding.
|
|
<p> <pre>
|
|
int mibEnum() const
|
|
</pre>
|
|
|
|
Return the MIB enum for the encoding if it is listed in the
|
|
<a href="http://www.iana.org/assignments/character-sets">IANA character-sets encoding file</a>.
|
|
<p> If the encoding is multi-byte then it will have "state"; that is,
|
|
the interpretation of some bytes will be dependent on some preceding
|
|
bytes. For such encodings, you must implement:
|
|
<p> <pre>
|
|
<a href="tqtextdecoder.html">TQTextDecoder</a>* makeDecoder() const
|
|
</pre>
|
|
|
|
Return a <a href="tqtextdecoder.html">TQTextDecoder</a> that remembers incomplete multi-byte sequence
|
|
prefixes or other required state.
|
|
<p> If the encoding does <em>not</em> require state, you should implement:
|
|
<p> <pre>
|
|
<a href="tqstring.html">TQString</a> toUnicode(const char* chars, int len) const
|
|
</pre>
|
|
|
|
Converts <em>len</em> characters from <em>chars</em> to Unicode.
|
|
<p> The base TQTextCodec class has default implementations of the above
|
|
two functions, <em>but they are mutually recursive</em>, so you must
|
|
re-implement at least one of them, or both for improved efficiency.
|
|
<p> For conversion from Unicode to 8-bit encodings, it is rarely necessary
|
|
to maintain state. However, two functions similar to the two above
|
|
are used for encoding:
|
|
<p> <pre>
|
|
<a href="tqtextencoder.html">TQTextEncoder</a>* makeEncoder() const
|
|
</pre>
|
|
|
|
Return a <a href="tqtextencoder.html">TQTextEncoder</a>.
|
|
<p> <pre>
|
|
<a href="ntqcstring.html">TQCString</a> fromUnicode(const <a href="tqstring.html">TQString</a>& uc, int& lenInOut ) const
|
|
</pre>
|
|
|
|
Converts <em>lenInOut</em> characters (of type <a href="qchar.html">TQChar</a>) from the start of
|
|
the string <em>uc</em>, returning a <a href="ntqcstring.html">TQCString</a> result, and also returning
|
|
the <a href="ntqcstring.html#length">length</a> of the result in
|
|
<em>lenInOut</em>.
|
|
<p> Again, these are mutually recursive so only one needs to be implemented,
|
|
or both if greater efficiency is possible.
|
|
<p> Finally, you must implement:
|
|
<p> <pre>
|
|
int heuristicContentMatch(const char* chars, int len) const
|
|
</pre>
|
|
|
|
Gives a value indicating how likely it is that <em>len</em> characters
|
|
from <em>chars</em> are in the encoding.
|
|
<p> A good model for this function is the
|
|
TQWindowsLocalCodec::heuristicContentMatch function found in the TQt
|
|
sources.
|
|
<p> A TQTextCodec subclass might have improved performance if you also
|
|
re-implement:
|
|
<p> <pre>
|
|
bool canEncode( <a href="qchar.html">TQChar</a> ) const
|
|
</pre>
|
|
|
|
Test if a Unicode character can be encoded.
|
|
<p> <pre>
|
|
bool canEncode( const <a href="tqstring.html">TQString</a>& ) const
|
|
</pre>
|
|
|
|
Test if a string of Unicode characters can be encoded.
|
|
<p> <pre>
|
|
int heuristicNameMatch(const char* hint) const
|
|
</pre>
|
|
|
|
Test if a possibly non-standard name is referring to the codec.
|
|
<p> Codecs can also be created as <a href="plugins-howto.html">plugins</a>.
|
|
<p>See also <a href="i18n.html">Internationalization with TQt</a>.
|
|
|
|
<hr><h2>Member Function Documentation</h2>
|
|
<h3 class=fn><a name="TQTextCodec"></a>TQTextCodec::TQTextCodec ()<tt> [protected]</tt>
|
|
</h3><p><b>Warning:</b> This function is <i>not</i> <a href="threads.html#reentrant">reentrant</a>.</p>
|
|
|
|
|
|
<p> Constructs a TQTextCodec, and gives it the highest precedence. The
|
|
TQTextCodec should always be constructed on the heap (i.e. with <tt>new</tt>). TQt takes ownership and will delete it when the application
|
|
terminates.
|
|
|
|
<h3 class=fn><a name="~TQTextCodec"></a>TQTextCodec::~TQTextCodec ()<tt> [virtual]</tt>
|
|
</h3><p><b>Warning:</b> This function is <i>not</i> <a href="threads.html#reentrant">reentrant</a>.</p>
|
|
|
|
|
|
<p> Destroys the TQTextCodec. Note that you should not delete codecs
|
|
yourself: once created they become TQt's responsibility.
|
|
|
|
<h3 class=fn>bool <a name="canEncode"></a>TQTextCodec::canEncode ( <a href="qchar.html">TQChar</a> ch ) const<tt> [virtual]</tt>
|
|
</h3>
|
|
Returns TRUE if the Unicode character <em>ch</em> can be fully encoded
|
|
with this codec; otherwise returns FALSE. The default
|
|
implementation tests if the result of <a href="#toUnicode">toUnicode</a>(fromUnicode(ch))
|
|
is the original <em>ch</em>. Subclasses may be able to improve the
|
|
efficiency.
|
|
|
|
<h3 class=fn>bool <a name="canEncode-2"></a>TQTextCodec::canEncode ( const <a href="tqstring.html">TQString</a> & s ) const<tt> [virtual]</tt>
|
|
</h3>
|
|
This is an overloaded member function, provided for convenience. It behaves essentially like the above function.
|
|
<p> <em>s</em> contains the string being tested for encode-ability.
|
|
|
|
<h3 class=fn><a href="tqtextcodec.html">TQTextCodec</a> * <a name="codecForCStrings"></a>TQTextCodec::codecForCStrings ()<tt> [static]</tt>
|
|
</h3>
|
|
|
|
<p> Returns the codec used by <a href="tqstring.html">TQString</a> to convert to and from const
|
|
char* and TQCStrings. If this function returns 0 (the default),
|
|
TQString assumes Latin-1.
|
|
<p> <p>See also <a href="#setCodecForCStrings">setCodecForCStrings</a>().
|
|
|
|
<h3 class=fn><a href="tqtextcodec.html">TQTextCodec</a> * <a name="codecForContent"></a>TQTextCodec::codecForContent ( const char * chars, int len )<tt> [static]</tt>
|
|
</h3>
|
|
Searches all installed TQTextCodec objects, returning the one which
|
|
most recognizes the given content. May return 0.
|
|
<p> Note that this is often a poor choice, since character encodings
|
|
often use most of the available character sequences, and so only
|
|
by linguistic analysis could a true match be made.
|
|
<p> <em>chars</em> contains the string to check, and <em>len</em> contains the
|
|
number of characters in the string to use.
|
|
<p> <p>See also <a href="#heuristicContentMatch">heuristicContentMatch</a>().
|
|
|
|
<p>Example: <a href="qwerty-example.html#x391">qwerty/qwerty.cpp</a>.
|
|
<h3 class=fn><a href="tqtextcodec.html">TQTextCodec</a> * <a name="codecForIndex"></a>TQTextCodec::codecForIndex ( int i )<tt> [static]</tt>
|
|
</h3>
|
|
Returns the TQTextCodec <em>i</em> positions from the most recently
|
|
inserted codec, or 0 if there is no such TQTextCodec. Thus,
|
|
<a href="#codecForIndex">codecForIndex</a>(0) returns the most recently created TQTextCodec.
|
|
|
|
<p>Example: <a href="qwerty-example.html#x392">qwerty/qwerty.cpp</a>.
|
|
<h3 class=fn><a href="tqtextcodec.html">TQTextCodec</a> * <a name="codecForLocale"></a>TQTextCodec::codecForLocale ()<tt> [static]</tt>
|
|
</h3> Returns a pointer to the codec most suitable for this locale.
|
|
<p>Example: <a href="qwerty-example.html#x393">qwerty/qwerty.cpp</a>.
|
|
<h3 class=fn><a href="tqtextcodec.html">TQTextCodec</a> * <a name="codecForMib"></a>TQTextCodec::codecForMib ( int mib )<tt> [static]</tt>
|
|
</h3>
|
|
Returns the TQTextCodec which matches the <a href="#mibEnum">MIBenum</a> <em>mib</em>.
|
|
|
|
<h3 class=fn><a href="tqtextcodec.html">TQTextCodec</a> * <a name="codecForName"></a>TQTextCodec::codecForName ( const char * name, int accuracy = 0 )<tt> [static]</tt>
|
|
</h3>
|
|
Searches all installed TQTextCodec objects and returns the one
|
|
which best matches <em>name</em>; the match is case-insensitive. Returns
|
|
0 if no codec's <a href="#heuristicNameMatch">heuristicNameMatch</a>() reports a match better than
|
|
<em>accuracy</em>, or if <em>name</em> is a null string.
|
|
<p> <p>See also <a href="#heuristicNameMatch">heuristicNameMatch</a>().
|
|
|
|
<h3 class=fn><a href="tqtextcodec.html">TQTextCodec</a> * <a name="codecForTr"></a>TQTextCodec::codecForTr ()<tt> [static]</tt>
|
|
</h3>
|
|
|
|
<p> Returns the codec used by <a href="tqobject.html#tr">TQObject::tr</a>() on its argument. If this
|
|
function returns 0 (the default), tr() assumes Latin-1.
|
|
<p> <p>See also <a href="#setCodecForTr">setCodecForTr</a>().
|
|
|
|
<h3 class=fn>void <a name="deleteAllCodecs"></a>TQTextCodec::deleteAllCodecs ()<tt> [static]</tt>
|
|
</h3>
|
|
Deletes all the created codecs.
|
|
<p> <b>Warning:</b> Do not call this function.
|
|
<p> <a href="ntqapplication.html">TQApplication</a> calls this function just before exiting to delete
|
|
any TQTextCodec objects that may be lying around. Since various
|
|
other classes hold pointers to TQTextCodec objects, it is not safe
|
|
to call this function earlier.
|
|
<p> If you are using the utility classes (like <a href="tqstring.html">TQString</a>) but not using
|
|
TQApplication, calling this function at the very end of your
|
|
application may be helpful for chasing down memory leaks by
|
|
eliminating any TQTextCodec objects.
|
|
|
|
<h3 class=fn><a href="ntqcstring.html">TQCString</a> <a name="fromUnicode"></a>TQTextCodec::fromUnicode ( const <a href="tqstring.html">TQString</a> & uc, int & lenInOut ) const<tt> [virtual]</tt>
|
|
</h3>
|
|
TQTextCodec subclasses must reimplement either this function or
|
|
<a href="#makeEncoder">makeEncoder</a>(). It converts the first <em>lenInOut</em> characters of <em>uc</em> from Unicode to the encoding of the subclass. If <em>lenInOut</em> is
|
|
negative or too large, the length of <em>uc</em> is used instead.
|
|
<p> Converts <em>lenInOut</em> characters (not bytes) from <em>uc</em>, producing
|
|
a <a href="ntqcstring.html">TQCString</a>. <em>lenInOut</em> will be set to the <a href="ntqcstring.html#length">length</a> of the result (in bytes).
|
|
<p> The default implementation makes an encoder with makeEncoder() and
|
|
converts the input with that. Note that the default makeEncoder()
|
|
implementation makes an encoder that simply calls this function,
|
|
hence subclasses <em>must</em> reimplement one function or the other to
|
|
avoid infinite recursion.
|
|
|
|
<p>Reimplemented in <a href="tqhebrewcodec.html#fromUnicode">TQHebrewCodec</a>.
|
|
<h3 class=fn><a href="ntqcstring.html">TQCString</a> <a name="fromUnicode-2"></a>TQTextCodec::fromUnicode ( const <a href="tqstring.html">TQString</a> & uc ) const
|
|
</h3>
|
|
This is an overloaded member function, provided for convenience. It behaves essentially like the above function.
|
|
<p> <em>uc</em> is the unicode source string.
|
|
|
|
<h3 class=fn>int <a name="heuristicContentMatch"></a>TQTextCodec::heuristicContentMatch ( const char * chars, int len ) const<tt> [pure virtual]</tt>
|
|
</h3>
|
|
|
|
<p> TQTextCodec subclasses must reimplement this function. It examines
|
|
the first <em>len</em> bytes of <em>chars</em> and returns a value indicating
|
|
how likely it is that the string is a prefix of text encoded in
|
|
the encoding of the subclass. A negative return value indicates
|
|
that the text is detectably not in the encoding (e.g. it contains
|
|
characters undefined in the encoding). A return value of 0
|
|
indicates that the text should be decoded with this codec rather
|
|
than as ASCII, but there is no particular evidence. The value
|
|
should range up to <em>len</em>. Thus, most decoders will return -1, 0,
|
|
or -<em>len</em>.
|
|
<p> The characters are not null terminated.
|
|
<p> <p>See also <a href="#codecForContent">codecForContent</a>().
|
|
|
|
<h3 class=fn>int <a name="heuristicNameMatch"></a>TQTextCodec::heuristicNameMatch ( const char * hint ) const<tt> [virtual]</tt>
|
|
</h3>
|
|
Returns a value indicating how likely it is that this decoder is
|
|
appropriate for decoding some format that has the given name. The
|
|
name is compared with the <em>hint</em>.
|
|
<p> A good match returns a positive number around the length of the
|
|
string. A bad match is negative.
|
|
<p> The default implementation calls <a href="#simpleHeuristicNameMatch">simpleHeuristicNameMatch</a>() with
|
|
the name of the codec.
|
|
|
|
<h3 class=fn><a href="tqtextcodec.html">TQTextCodec</a> * <a name="loadCharmap"></a>TQTextCodec::loadCharmap ( <a href="ntqiodevice.html">TQIODevice</a> * iod )<tt> [static]</tt>
|
|
</h3>
|
|
Reads a POSIX2 charmap definition from <em>iod</em>.
|
|
The parser recognizes the following lines:
|
|
<p> <font name="sans">
|
|
<code_set_name> <i>name</i></br>
|
|
<escape_char> <i>character</i></br>
|
|
% alias <i>alias</i></br>
|
|
CHARMAP</br>
|
|
<<i>token</i>> /x<i>hexbyte</i> <U<i>unicode</i>> ...</br>
|
|
<<i>token</i>> /d<i>decbyte</i> <U<i>unicode</i>> ...</br>
|
|
<<i>token</i>> /<i>octbyte</i> <U<i>unicode</i>> ...</br>
|
|
<<i>token</i>> /<i>any</i>/<i>any</i>... <U<i>unicode</i>> ...</br>
|
|
END CHARMAP</br>
|
|
</font>
|
|
<p> The resulting TQTextCodec is returned (and also added to the global
|
|
list of codecs). The <a href="#name">name</a>() of the result is taken from the
|
|
code_set_name.
|
|
<p> Note that a codec constructed in this way uses much more memory
|
|
and is slower than a hand-written TQTextCodec subclass, since
|
|
tables in code are kept in memory shared by all TQt applications.
|
|
<p> <p>See also <a href="#loadCharmapFile">loadCharmapFile</a>().
|
|
|
|
<p>Example: <a href="qwerty-example.html#x394">qwerty/qwerty.cpp</a>.
|
|
<h3 class=fn><a href="tqtextcodec.html">TQTextCodec</a> * <a name="loadCharmapFile"></a>TQTextCodec::loadCharmapFile ( <a href="tqstring.html">TQString</a> filename )<tt> [static]</tt>
|
|
</h3>
|
|
A convenience function for <a href="#loadCharmap">loadCharmap</a>() that loads the charmap
|
|
definition from the file <em>filename</em>.
|
|
|
|
<h3 class=fn>const char * <a name="locale"></a>TQTextCodec::locale ()<tt> [static]</tt>
|
|
</h3>
|
|
Returns a string representing the current language and
|
|
sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
|
|
|
|
<p>Example: <a href="i18n-example.html#x1949">i18n/main.cpp</a>.
|
|
<h3 class=fn><a href="tqtextdecoder.html">TQTextDecoder</a> * <a name="makeDecoder"></a>TQTextCodec::makeDecoder () const<tt> [virtual]</tt>
|
|
</h3>
|
|
Creates a <a href="tqtextdecoder.html">TQTextDecoder</a> which stores enough state to decode chunks
|
|
of char* data to create chunks of Unicode data. The default
|
|
implementation creates a stateless decoder, which is only
|
|
sufficient for the simplest encodings where each byte corresponds
|
|
to exactly one Unicode character.
|
|
<p> The caller is responsible for deleting the returned object.
|
|
|
|
<h3 class=fn><a href="tqtextencoder.html">TQTextEncoder</a> * <a name="makeEncoder"></a>TQTextCodec::makeEncoder () const<tt> [virtual]</tt>
|
|
</h3>
|
|
Creates a <a href="tqtextencoder.html">TQTextEncoder</a> which stores enough state to encode chunks
|
|
of Unicode data as char* data. The default implementation creates
|
|
a stateless encoder, which is only sufficient for the simplest
|
|
encodings where each Unicode character corresponds to exactly one
|
|
character.
|
|
<p> The caller is responsible for deleting the returned object.
|
|
|
|
<h3 class=fn>int <a name="mibEnum"></a>TQTextCodec::mibEnum () const<tt> [pure virtual]</tt>
|
|
</h3>
|
|
|
|
<p> Subclasses of TQTextCodec must reimplement this function. It
|
|
returns the MIBenum (see <a href="http://www.iana.org/assignments/character-sets">the
|
|
IANA character-sets encoding file</a> for more information).
|
|
It is important that each TQTextCodec subclass returns the correct
|
|
unique value for this function.
|
|
|
|
<p>Reimplemented in <a href="tqeucjpcodec.html#mibEnum">TQEucJpCodec</a>.
|
|
<h3 class=fn>const char * <a name="mimeName"></a>TQTextCodec::mimeName () const<tt> [virtual]</tt>
|
|
</h3>
|
|
Returns the preferred mime name of the encoding as defined in the
|
|
<a href="http://www.iana.org/assignments/character-sets">IANA character-sets encoding file</a>.
|
|
|
|
<p>Reimplemented in <a href="tqeucjpcodec.html#mimeName">TQEucJpCodec</a>, <a href="tqeuckrcodec.html#mimeName">TQEucKrCodec</a>, <a href="tqjiscodec.html#mimeName">TQJisCodec</a>, <a href="tqhebrewcodec.html#mimeName">TQHebrewCodec</a>, and <a href="tqsjiscodec.html#mimeName">TQSjisCodec</a>.
|
|
<h3 class=fn>const char * <a name="name"></a>TQTextCodec::name () const<tt> [pure virtual]</tt>
|
|
</h3>
|
|
|
|
<p> TQTextCodec subclasses must reimplement this function. It returns
|
|
the name of the encoding supported by the subclass. When choosing
|
|
a name for an encoding, consider these points:
|
|
<ul>
|
|
<li> On X11, <a href="#heuristicNameMatch">heuristicNameMatch</a>( const char * hint )
|
|
is used to test if a the TQTextCodec
|
|
can convert between Unicode and the encoding of a font
|
|
with encoding <em>hint</em>, such as "iso8859-1" for Latin-1 fonts,
|
|
"koi8-r" for Russian KOI8 fonts.
|
|
The default algorithm of heuristicNameMatch() uses <a href="#name">name</a>().
|
|
<li> Some applications may use this function to present
|
|
encodings to the end user.
|
|
</ul>
|
|
|
|
<p>Example: <a href="qwerty-example.html#x395">qwerty/qwerty.cpp</a>.
|
|
<h3 class=fn>void <a name="setCodecForCStrings"></a>TQTextCodec::setCodecForCStrings ( <a href="tqtextcodec.html">TQTextCodec</a> * c )<tt> [static]</tt>
|
|
</h3><p><b>Warning:</b> This function is <i>not</i> <a href="threads.html#reentrant">reentrant</a>.</p>
|
|
|
|
|
|
|
|
<p> Sets the codec used by <a href="tqstring.html">TQString</a> to convert to and from const char*
|
|
and TQCStrings. If <em>c</em> is 0 (the default), TQString assumes Latin-1.
|
|
<p> <b>Warning:</b> Some codecs do not preserve the characters in the ascii
|
|
range (0x00 to 0x7f). For example, the Japanese Shift-JIS
|
|
encoding maps the backslash character (0x5a) to the Yen character.
|
|
This leads to unexpected results when using the backslash
|
|
character to escape characters in strings used in e.g. regular
|
|
expressions. Use <a href="tqstring.html#fromLatin1">TQString::fromLatin1</a>() to preserve characters in
|
|
the ascii range when needed.
|
|
<p> <p>See also <a href="#codecForCStrings">codecForCStrings</a>() and <a href="#setCodecForTr">setCodecForTr</a>().
|
|
|
|
<h3 class=fn>void <a name="setCodecForLocale"></a>TQTextCodec::setCodecForLocale ( <a href="tqtextcodec.html">TQTextCodec</a> * c )<tt> [static]</tt>
|
|
</h3>
|
|
Set the codec to <em>c</em>; this will be returned by <a href="#codecForLocale">codecForLocale</a>().
|
|
This might be needed for some applications that want to use their
|
|
own mechanism for setting the locale.
|
|
<p> <p>See also <a href="#codecForLocale">codecForLocale</a>().
|
|
|
|
<h3 class=fn>void <a name="setCodecForTr"></a>TQTextCodec::setCodecForTr ( <a href="tqtextcodec.html">TQTextCodec</a> * c )<tt> [static]</tt>
|
|
</h3><p><b>Warning:</b> This function is <i>not</i> <a href="threads.html#reentrant">reentrant</a>.</p>
|
|
|
|
|
|
|
|
<p> Sets the codec used by <a href="tqobject.html#tr">TQObject::tr</a>() on its argument to <em>c</em>. If
|
|
<em>c</em> is 0 (the default), tr() assumes Latin-1.
|
|
<p> If the literal quoted text in the program is not in the Latin-1
|
|
encoding, this function can be used to set the appropriate
|
|
encoding. For example, software developed by Korean programmers
|
|
might use eucKR for all the text in the program, in which case the
|
|
main() function might look like this:
|
|
<p> <pre>
|
|
int main(int argc, char** argv)
|
|
{
|
|
<a href="ntqapplication.html">TQApplication</a> app(argc, argv);
|
|
... install any additional codecs ...
|
|
TQTextCodec::<a href="#setCodecForTr">setCodecForTr</a>( TQTextCodec::<a href="#codecForName">codecForName</a>("eucKR") );
|
|
...
|
|
}
|
|
</pre>
|
|
|
|
<p> Note that this is not the way to select the encoding that the <em>user</em> has chosen. For example, to convert an application containing
|
|
literal English strings to Korean, all that is needed is for the
|
|
English strings to be passed through tr() and for translation
|
|
files to be loaded. For details of <a href="i18n.html#internationalization">internationalization</a>, see the
|
|
<a href="i18n.html">TQt internationalization documentation</a>.
|
|
<p> <p>See also <a href="#codecForTr">codecForTr</a>() and <a href="#setCodecForCStrings">setCodecForCStrings</a>().
|
|
|
|
<h3 class=fn>int <a name="simpleHeuristicNameMatch"></a>TQTextCodec::simpleHeuristicNameMatch ( const char * name, const char * hint )<tt> [static protected]</tt>
|
|
</h3>
|
|
A simple utility function for <a href="#heuristicNameMatch">heuristicNameMatch</a>(): it does some
|
|
very minor character-skipping so that almost-exact matches score
|
|
high. <em>name</em> is the text we're matching and <em>hint</em> is used for
|
|
the comparison.
|
|
|
|
<h3 class=fn><a href="tqstring.html">TQString</a> <a name="toUnicode"></a>TQTextCodec::toUnicode ( const char * chars, int len ) const<tt> [virtual]</tt>
|
|
</h3>
|
|
TQTextCodec subclasses must reimplement this function or
|
|
<a href="#makeDecoder">makeDecoder</a>(). It converts the first <em>len</em> characters of <em>chars</em>
|
|
to Unicode.
|
|
<p> The default implementation makes a decoder with makeDecoder() and
|
|
converts the input with that. Note that the default makeDecoder()
|
|
implementation makes a decoder that simply calls
|
|
this function, hence subclasses <em>must</em> reimplement one function or
|
|
the other to avoid infinite recursion.
|
|
|
|
<h3 class=fn><a href="tqstring.html">TQString</a> <a name="toUnicode-2"></a>TQTextCodec::toUnicode ( const <a href="qbytearray.html">TQByteArray</a> & a, int len ) const
|
|
</h3>
|
|
This is an overloaded member function, provided for convenience. It behaves essentially like the above function.
|
|
<p> <em>a</em> contains the source characters; <em>len</em> contains the number of
|
|
characters in <em>a</em> to use.
|
|
|
|
<h3 class=fn><a href="tqstring.html">TQString</a> <a name="toUnicode-3"></a>TQTextCodec::toUnicode ( const <a href="qbytearray.html">TQByteArray</a> & a ) const
|
|
</h3>
|
|
This is an overloaded member function, provided for convenience. It behaves essentially like the above function.
|
|
<p> <em>a</em> contains the source characters.
|
|
|
|
<h3 class=fn><a href="tqstring.html">TQString</a> <a name="toUnicode-4"></a>TQTextCodec::toUnicode ( const <a href="ntqcstring.html">TQCString</a> & a, int len ) const
|
|
</h3>
|
|
This is an overloaded member function, provided for convenience. It behaves essentially like the above function.
|
|
<p> <em>a</em> contains the source characters; <em>len</em> contains the number of
|
|
characters in <em>a</em> to use.
|
|
|
|
<h3 class=fn><a href="tqstring.html">TQString</a> <a name="toUnicode-5"></a>TQTextCodec::toUnicode ( const <a href="ntqcstring.html">TQCString</a> & a ) const
|
|
</h3>
|
|
This is an overloaded member function, provided for convenience. It behaves essentially like the above function.
|
|
<p> <em>a</em> contains the source characters.
|
|
|
|
<h3 class=fn><a href="tqstring.html">TQString</a> <a name="toUnicode-6"></a>TQTextCodec::toUnicode ( const char * chars ) const
|
|
</h3>
|
|
This is an overloaded member function, provided for convenience. It behaves essentially like the above function.
|
|
<p> <em>chars</em> contains the source characters.
|
|
|
|
<!-- eof -->
|
|
<hr><p>
|
|
This file is part of the <a href="index.html">TQt toolkit</a>.
|
|
Copyright © 1995-2007
|
|
<a href="http://www.trolltech.com/">Trolltech</a>. All Rights Reserved.<p><address><hr><div align=center>
|
|
<table width=100% cellspacing=0 border=0><tr>
|
|
<td>Copyright © 2007
|
|
<a href="troll.html">Trolltech</a><td align=center><a href="trademarks.html">Trademarks</a>
|
|
<td align=right><div align=right>TQt 3.3.8</div>
|
|
</table></div></address></body>
|
|
</html>
|