You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
199 lines
7.3 KiB
199 lines
7.3 KiB
//C- -------------------------------------------------------------------
|
|
//C- DjVuLibre-3.5
|
|
//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
|
|
//C- Copyright (c) 2001 AT&T
|
|
//C-
|
|
//C- This software is subject to, and may be distributed under, the
|
|
//C- GNU General Public License, Version 2. The license should have
|
|
//C- accompanied the software or you may obtain a copy of the license
|
|
//C- from the Free Software Foundation at http://www.fsf.org .
|
|
//C-
|
|
//C- This program is distributed in the hope that it will be useful,
|
|
//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
//C- GNU General Public License for more details.
|
|
//C-
|
|
//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
|
|
//C- distributed by Lizardtech Software. On July 19th 2002, Lizardtech
|
|
//C- Software authorized us to replace the original DjVu(r) Reference
|
|
//C- Library notice by the following text (see doc/lizard2002.djvu):
|
|
//C-
|
|
//C- ------------------------------------------------------------------
|
|
//C- | DjVu (r) Reference Library (v. 3.5)
|
|
//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
|
|
//C- | The DjVu Reference Library is protected by U.S. Pat. No.
|
|
//C- | 6,058,214 and patents pending.
|
|
//C- |
|
|
//C- | This software is subject to, and may be distributed under, the
|
|
//C- | GNU General Public License, Version 2. The license should have
|
|
//C- | accompanied the software or you may obtain a copy of the license
|
|
//C- | from the Free Software Foundation at http://www.fsf.org .
|
|
//C- |
|
|
//C- | The computer code originally released by LizardTech under this
|
|
//C- | license and unmodified by other parties is deemed "the LIZARDTECH
|
|
//C- | ORIGINAL CODE." Subject to any third party intellectual property
|
|
//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
|
|
//C- | non-exclusive license to make, use, sell, or otherwise dispose of
|
|
//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
|
|
//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
|
|
//C- | General Public License. This grant only confers the right to
|
|
//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
|
|
//C- | the extent such infringement is reasonably necessary to enable
|
|
//C- | recipient to make, have made, practice, sell, or otherwise dispose
|
|
//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
|
|
//C- | any greater extent that may be necessary to utilize further
|
|
//C- | modifications or combinations.
|
|
//C- |
|
|
//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
|
|
//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
|
//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
|
|
//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
|
//C- +------------------------------------------------------------------
|
|
//
|
|
// $Id: UnicodeByteStream.h,v 1.9 2003/11/07 22:08:22 leonb Exp $
|
|
// $Name: release_3_5_15 $
|
|
|
|
#ifndef _UNICODEBYTESTREAM_H_
|
|
#define _UNICODEBYTESTREAM_H_
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
#if NEED_GNUG_PRAGMAS
|
|
# pragma interface
|
|
#endif
|
|
|
|
|
|
/** @name UnicodeByteStream.h
|
|
|
|
Files #"UnicodeByteStream.h"# and #"UnicodeByteStream.cpp"# implement a parser for
|
|
files structured W3C Extensible Markup Language (XML) 1.0 (Second Edition).
|
|
|
|
Class \Ref{UnicodeByteStream} provides a way to read or write XML files.
|
|
files. Member functions provide an easy mean to position the underlying
|
|
\Ref{ByteStream}.
|
|
|
|
{\bf References} --- W3C Extensible Markup Language (XML) 1.0
|
|
(Second Edition)
|
|
\URL{http://www.w3.org/TR/2000/REC-xml-20001006.html}
|
|
|
|
@memo
|
|
XML file parser.
|
|
@author
|
|
Bill C Riemers <docbill@sourceforge.net>
|
|
@version
|
|
#$Id: UnicodeByteStream.h,v 1.9 2003/11/07 22:08:22 leonb Exp $# */
|
|
//@{
|
|
|
|
#include "DjVuGlobal.h"
|
|
#include "GString.h"
|
|
#include "ByteStream.h"
|
|
|
|
|
|
#ifdef HAVE_NAMESPACES
|
|
namespace DJVU {
|
|
# ifdef NOT_DEFINED // Just to fool emacs c++ mode
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
/** ByteStream interface for an Unicode file.
|
|
|
|
Class #UnicodeByteStream# augments the #ByteStream# interface with
|
|
functions for navigating Unicode documents. It works in relation
|
|
with a ByteStream specified at construction time.
|
|
|
|
{\bf Reading an Unicode file} --- You can read an Unicode file by
|
|
constructing an #UnicodeByteStream# object attached to the ByteStream
|
|
containing the Unicode file.
|
|
|
|
{\bf Writing an Unicode file} --- You can write an Unicode file by
|
|
constructing an #UnicodeByteStream# object attached to the seekable
|
|
ByteStream object that will contain the XML file.
|
|
|
|
Writing an XML file requires a seekable ByteStream (see
|
|
\Ref{ByteStream::is_seekable}). This is not much of a problem because you
|
|
can always create the XML file into a \Ref{MemoryByteStream} and then use
|
|
\Ref{ByteStream::copy} to transfer the XML file into a non seekable
|
|
ByteStream. */
|
|
|
|
class UnicodeByteStream : public ByteStream
|
|
{
|
|
protected:
|
|
UnicodeByteStream(const UnicodeByteStream &bs);
|
|
UnicodeByteStream(GP<ByteStream> bs,
|
|
const GStringRep::EncodeType encodetype=GStringRep::XUTF8);
|
|
public:
|
|
/** Constructs an UnicodeByteStream object attached to ByteStream #bs#.
|
|
Any ByteStream can be used when reading an XML file. Writing
|
|
an XML file however requires a seekable ByteStream. */
|
|
static GP<UnicodeByteStream> create(GP<ByteStream> bs,
|
|
const GStringRep::EncodeType encodetype=GStringRep::XUTF8)
|
|
{ return new UnicodeByteStream(bs,encodetype); }
|
|
|
|
// --- BYTESTREAM INTERFACE
|
|
~UnicodeByteStream();
|
|
/// Sets the encoding type and seek's to position 0.
|
|
void set_encodetype(const GStringRep::EncodeType et=GStringRep::XUTF8);
|
|
void set_encoding(const GUTF8String &encoding);
|
|
/// Simmular to fgets(), except read aheads effect the tell() position.
|
|
virtual GUTF8String gets(size_t const t=0,unsigned long const stopat='\n',bool const inclusive=true);
|
|
/// Resets the gets buffering as well as physically seeking.
|
|
virtual int seek(long offset, int whence = SEEK_SET, bool nothrow=false);
|
|
/** Physically reads the specified bytes, and truncate the read ahead buffer.
|
|
*/
|
|
virtual size_t read(void *buffer, size_t size);
|
|
/// Not correctly implimented...
|
|
virtual size_t write(const void *buffer, size_t size);
|
|
/// tell will tell you the read position, including read ahead for gets()...
|
|
virtual long tell(void) const;
|
|
/// Does a flush, and clears the read ahead buffer.
|
|
virtual void flush(void);
|
|
|
|
/// Find out how many lines have been read with gets.
|
|
int get_lines_read(void) const { return linesread; }
|
|
protected:
|
|
/// The real byte stream.
|
|
GP<ByteStream> bs;
|
|
GUTF8String buffer;
|
|
int bufferpos;
|
|
int linesread;
|
|
long startpos;
|
|
private:
|
|
// Cancel C++ default stuff
|
|
UnicodeByteStream & operator=(UnicodeByteStream &);
|
|
};
|
|
|
|
|
|
class XMLByteStream : public UnicodeByteStream
|
|
{
|
|
protected:
|
|
XMLByteStream(GP<ByteStream> &bs);
|
|
XMLByteStream(UnicodeByteStream &bs);
|
|
void init(void);
|
|
public:
|
|
static GP<XMLByteStream> create(GP<ByteStream> bs);
|
|
static GP<XMLByteStream> create(UnicodeByteStream &bs);
|
|
// --- BYTESTREAM INTERFACE
|
|
~XMLByteStream();
|
|
};
|
|
|
|
inline GP<XMLByteStream>
|
|
XMLByteStream::create(UnicodeByteStream &bs)
|
|
{
|
|
return new XMLByteStream(bs);
|
|
}
|
|
|
|
//@}
|
|
|
|
|
|
#ifdef HAVE_NAMESPACES
|
|
}
|
|
# ifndef NOT_USING_DJVU_NAMESPACE
|
|
using namespace DJVU;
|
|
# endif
|
|
#endif
|
|
#endif
|
|
|