You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
289 lines
10 KiB
289 lines
10 KiB
/***************************************************************************
|
|
* Copyright (C) 2004-2007 by Georgy Yunaev, gyunaev@ulduzsoft.com *
|
|
* Portions Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net> *
|
|
* Please do not use email address above for bug reports; see *
|
|
* the README file *
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of the GNU General Public License as published by *
|
|
* the Free Software Foundation; either version 2 of the License, or *
|
|
* (at your option) any later version. *
|
|
* *
|
|
* This program is distributed in the hope that it will be useful, *
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
* GNU General Public License for more details. *
|
|
* *
|
|
* You should have received a copy of the GNU General Public License *
|
|
* along with this program; if not, write to the *
|
|
* Free Software Foundation, Inc., *
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
|
|
***************************************************************************/
|
|
#if USE_BUILTIN_CHMLIB
|
|
#include "chm_lib.h"
|
|
#else
|
|
#include <chm_lib.h>
|
|
#endif
|
|
#include "libchmfile.h"
|
|
#include "libchmtocimage.h"
|
|
|
|
#include <sys/types.h> /* for u_int{32,64}_t */
|
|
|
|
//! Keeps the intermediate search result
|
|
class LCHMSearchProgressResult
|
|
{
|
|
public:
|
|
inline LCHMSearchProgressResult() {}
|
|
inline LCHMSearchProgressResult( u_int32_t t, u_int32_t u ) : titleoff(t),urloff(u) {}
|
|
|
|
TQValueVector<u_int64_t> offsets;
|
|
u_int32_t titleoff;
|
|
u_int32_t urloff;
|
|
};
|
|
|
|
//! An array to keeps the intermediate search results
|
|
typedef QT34VECTOR<LCHMSearchProgressResult> LCHMSearchProgressResults;
|
|
|
|
|
|
//! CHM files processor; the implementation
|
|
class LCHMFileImpl
|
|
{
|
|
public:
|
|
LCHMFileImpl();
|
|
~LCHMFileImpl();
|
|
|
|
// Implementations for LCHMFile members
|
|
bool loadFile( const TQString& archiveName );
|
|
void closeAll();
|
|
|
|
TQString title() const { return encodeWithCurrentCodec( m_title ); }
|
|
TQString homeUrl() const { return encodeWithCurrentCodec( m_home ); }
|
|
|
|
bool getFileContentAsString( TQString * str, const TQString& url, bool internal_encoding = false );
|
|
bool getFileContentAsBinary( TQByteArray * data, const TQString& url ) const;
|
|
bool getFileSize( unsigned int * size, const TQString& url );
|
|
|
|
bool enumerateFiles( TQStringList * files );
|
|
TQString getTopicByUrl ( const TQString& url ) const;
|
|
|
|
const TQPixmap * getBookIconPixmap( unsigned int imagenum );
|
|
|
|
bool setCurrentEncoding( const LCHMTextEncoding * encoding );
|
|
|
|
//! Parse the HHC or HHS file, and fill the context (asIndex is false) or index (asIndex is true) array.
|
|
bool parseFileAndFillArray (const TQString& file, QT34VECTOR< LCHMParsedEntry > * data, bool asIndex );
|
|
|
|
/*!
|
|
* \brief Fast search using the $FIftiMain file in the .chm.
|
|
* \param text The text we're looking for.
|
|
* \param wholeWords Are we looking for whole words only?
|
|
* \param titlesOnly Are we looking for titles only?
|
|
* \param results A string-string hashmap that will hold
|
|
* the results in case of successful search. The keys are
|
|
* the URLs and the values are the page titles.
|
|
* \param phrase_search Indicates that word offset information should be kept.
|
|
* \return true if the search found something, false otherwise.
|
|
*/
|
|
bool searchWord( const TQString& word,
|
|
bool wholeWords,
|
|
bool titlesOnly,
|
|
LCHMSearchProgressResults& results,
|
|
bool phrase_search );
|
|
|
|
/*!
|
|
* \brief Finalize the search, resolve the matches, the and generate the results array.
|
|
* \param tempres Temporary search results from SearchWord.
|
|
* \param results A string-string hashmap that will hold the results in case of successful search.
|
|
* The keys are the URLs and the values are the page titles.
|
|
*/
|
|
void getSearchResults( const LCHMSearchProgressResults& tempres,
|
|
TQStringList * results,
|
|
unsigned int limit_results = 500 );
|
|
|
|
//! Looks up fileName in the archive.
|
|
bool ResolveObject( const TQString& fileName, chmUnitInfo *ui ) const;
|
|
|
|
//! Retrieves an uncompressed chunk of a file in the .chm.
|
|
size_t RetrieveObject(const chmUnitInfo *ui, unsigned char *buffer, LONGUINT64 fileOffset, LONGINT64 bufferSize) const;
|
|
|
|
//! Encode the string with the currently selected text codec, if possible. Or return as-is, if not.
|
|
inline TQString encodeWithCurrentCodec (const TQString& str) const
|
|
{
|
|
return (m_textCodec ? m_textCodec->toUnicode (str.utf8()) : str);
|
|
}
|
|
|
|
//! Encode the string with the currently selected text codec, if possible. Or return as-is, if not.
|
|
inline TQString encodeWithCurrentCodec (const char * str) const
|
|
{
|
|
return (m_textCodec ? m_textCodec->toUnicode (str) : (TQString) str);
|
|
}
|
|
|
|
//! Encode the string from internal files with the currently selected text codec, if possible.
|
|
//! Or return as-is, if not.
|
|
inline TQString encodeInternalWithCurrentCodec (const TQString& str) const
|
|
{
|
|
return (m_textCodecForSpecialFiles ? m_textCodecForSpecialFiles->toUnicode (str.utf8()) : str);
|
|
}
|
|
|
|
//! Encode the string from internal files with the currently selected text codec, if possible.
|
|
//! Or return as-is, if not.
|
|
inline TQString encodeInternalWithCurrentCodec (const char * str) const
|
|
{
|
|
return (m_textCodecForSpecialFiles ? m_textCodecForSpecialFiles->toUnicode (str) : (TQString) str);
|
|
}
|
|
|
|
//! Helper. Translates from Win32 encodings to generic wxWidgets ones.
|
|
const char * GetFontEncFromCharSet (const TQString& font) const;
|
|
|
|
//! Helper. Returns the $FIftiMain offset of leaf node or 0.
|
|
u_int32_t GetLeafNodeOffset(const TQString& text,
|
|
u_int32_t initalOffset,
|
|
u_int32_t buffSize,
|
|
u_int16_t treeDepth );
|
|
|
|
//! Helper. Processes the word location code entries while searching.
|
|
bool ProcessWLC(u_int64_t wlc_count,
|
|
u_int64_t wlc_size,
|
|
u_int32_t wlc_offset,
|
|
unsigned char ds,
|
|
unsigned char dr,
|
|
unsigned char cs,
|
|
unsigned char cr,
|
|
unsigned char ls,
|
|
unsigned char lr,
|
|
LCHMSearchProgressResults& results,
|
|
bool phrase_search );
|
|
|
|
//! Looks up as much information as possible from #WINDOWS/#STRINGS.
|
|
bool getInfoFromWindows();
|
|
|
|
//! Looks up as much information as possible from #SYSTEM.
|
|
bool getInfoFromSystem();
|
|
|
|
//! Fill the topic-url map
|
|
void fillTopicsUrlMap();
|
|
|
|
//! Sets up textCodec
|
|
void setupTextCodec (const char * name);
|
|
|
|
//! Guess used text encoding, using m_detectedLCID and m_font. Set up m_textCodec
|
|
bool guessTextEncoding ();
|
|
|
|
//! Change the current CHM encoding for internal files and texts.
|
|
//! Encoding could be either simple TQt codepage, or set like CP1251/KOI8, which allows to
|
|
//! set up encodings separately for text (first) and internal files (second)
|
|
bool changeFileEncoding( const char *qtencoding );
|
|
|
|
//! Convert the word, so it has an appropriate encoding
|
|
TQCString convertSearchWord ( const TQString &src );
|
|
|
|
/*!
|
|
* Helper procedure in TOC parsing, decodes the string between the quotes (first or last) with decoding HTML
|
|
* entities like í
|
|
*/
|
|
int findStringInQuotes (const TQString& tag, int offset, TQString& value, bool firstquote, bool decodeentities );
|
|
|
|
/*!
|
|
* Decodes Unicode HTML entities according to current encoding.
|
|
*/
|
|
TQString decodeEntity (const TQString& entity );
|
|
|
|
/*!
|
|
* \brief Returns the list of all available text encodings.
|
|
* \return A pointer to the beginning of the text encoding table. The table could be
|
|
* enumerated until language == 0, which means end of table.
|
|
*
|
|
* \ingroup encoding
|
|
*/
|
|
static const LCHMTextEncoding * getTextEncodingTable();
|
|
|
|
/*!
|
|
* \brief Looks up for encoding by LCID
|
|
* \param lcid LCID to look up
|
|
* \return A pointer to encoding structure.
|
|
*
|
|
* \ingroup encoding
|
|
*/
|
|
static const LCHMTextEncoding * lookupByLCID( short lcid );
|
|
|
|
/*!
|
|
* \brief Get the encoding index
|
|
* \param enc Encoding
|
|
* \return An index in encoding table. getTextEncodingTable() + i gets the encoding.
|
|
*
|
|
* \ingroup encoding
|
|
*/
|
|
static int getEncodingIndex( const LCHMTextEncoding * enc);
|
|
|
|
/*!
|
|
* Normalizes path to search in internal arrays
|
|
*/
|
|
TQString normalizeUrl (const TQString& path ) const;
|
|
|
|
|
|
// Members
|
|
|
|
//! Pointer to the chmlib structure
|
|
chmFile * m_chmFile;
|
|
|
|
//! Opened file name
|
|
TQString m_filename;
|
|
|
|
//! Home url, got from CHM file
|
|
TQString m_home;
|
|
|
|
//! Context tree filename. Got from CHM file
|
|
TQString m_topicsFile;
|
|
|
|
//! Index filename. Got from CHM file
|
|
TQString m_indexFile;
|
|
|
|
//! Chm Title. Got from CHM file
|
|
TQString m_title;
|
|
|
|
// Localization stuff
|
|
//! LCID from CHM file, used in encoding detection
|
|
short m_detectedLCID;
|
|
|
|
//! font charset from CHM file, used in encoding detection
|
|
TQString m_font;
|
|
|
|
//! Chosen text codec
|
|
TQTextCodec * m_textCodec;
|
|
TQTextCodec * m_textCodecForSpecialFiles;
|
|
|
|
//! Current encoding
|
|
const LCHMTextEncoding * m_currentEncoding;
|
|
|
|
//! Map to decode HTML entitles like ´ based on current encoding
|
|
TQMap<TQString, TQString> m_entityDecodeMap;
|
|
|
|
//! TRUE if /#TOPICS, /#STRINGS, /#URLTBL and /#URLSTR are resolved, and the members below are valid
|
|
bool m_lookupTablesValid;
|
|
|
|
//! pointer to /#TOPICS
|
|
chmUnitInfo m_chmTOPICS;
|
|
|
|
//! pointer to /#STRINGS
|
|
chmUnitInfo m_chmSTRINGS;
|
|
|
|
//! pointer to /#URLTBL
|
|
chmUnitInfo m_chmURLTBL;
|
|
|
|
//! pointer to /#URLSTR
|
|
chmUnitInfo m_chmURLSTR;
|
|
|
|
//! Indicates whether the built-in search is available. This is true only when m_lookupTablesValid
|
|
//! is TRUE, and m_chmFIftiMain is resolved.
|
|
bool m_searchAvailable;
|
|
|
|
//! pointer to /$FIftiMain
|
|
chmUnitInfo m_chmFIftiMain;
|
|
|
|
//! Book TOC icon images storage
|
|
LCHMTocImageKeeper m_imagesKeeper;
|
|
|
|
//! Map url->topic
|
|
TQMap< TQString, TQString > m_url2topics;
|
|
};
|