kpilot/conduits/docconduit/makedoc9.cc

// based on: MakeDoc, version 2
// I only took the tBuf class from there and adapted it.
//
// Compresses text files into a format that is ready to export to a Pilot
// and work with Rick Bram's PilotDOC reader.
// Copyright (C) Reinhold Kainhofer, 2002
// Copyrigth (C) Pat Beirne, 2000
//
// Original file (makedoc9.cpp) copyright by:
// Copyright (C) Pat Beirne, 2000.
// Distributable under the GNU General Public License Version 2 or later.
//
// ver 0.6 enforce 31 char limit on database names
// ver 0.7 change header and record0 to structs
// ver 2.0 added category control on the command line
//              changed extensions from .prc to .pdb

/*
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program in a file called COPYING; if not, write to
** the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
** MA 02110-1301, USA.
*/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <iostream>


#include "makedoc9.h"


//
// Issue()
//
// action: handle the details of writing a single
//              character to the compressed stream
//
unsigned
 tBuf::Issue(byte src, int &bSpace)
{
	unsigned int iDest = len;
	byte *dest = buf;

	// TODO: which of the if parts should really be included???
#if 0
	// modified version of issue
	// just issue the char
	if (src >= 0x80 || src <= 8)
		dest[iDest++] = 1;
	dest[iDest++] = src;

#else
	// if there is an outstanding space char, see if
	// we can squeeze it in with an ASCII char
	if (bSpace)
	{
		if (src >= 0x40 && src <= 0x7F)
			dest[iDest++] = src ^ 0x80;
		else
		{
			// couldn't squeeze it in, so issue the space char by itself
			// most chars go out simple, except the range 1...8,0x80...0xFF
			dest[iDest++] = ' ';
			if (src < 0x80 && (src == 0 || src > 8))
				dest[iDest++] = src;
			else
				dest[iDest++] = 1, dest[iDest++] = src;
		}
		// knock down the space flag
		bSpace = 0;
	}
	else
	{
		// check for a space char
		if (src == ' ')
			bSpace = 1;
		else
		{
			if (src < 0x80 && (src == 0 || src > 8))
				dest[iDest++] = src;
			else
				dest[iDest++] = 1, dest[iDest++] = src;

		}
	}
#endif
	len = iDest;
	return iDest;
}

//
// Compress
//
// params:      none
//
// action:      takes the given buffer,
//                                      and compresses
//                                      the original data down into a second buffer
//
// comment:     This version make heavy use of walking pointers.
//
unsigned tBuf::Compress()
{
	if (!buf)
		return 0;
	if (isCompressed) {
//		cout<<"Buffer is already compressed!"<<endl;
		return len;
//	} else {
//		cout<<" Compressing buffer!!!"<<endl;
	}

	unsigned int i;

	// run through the input buffer
	byte *pBuffer;					  // points to the input buffer
	byte *pHit;						  // points to a walking test hit; works upwards on successive matches
	byte *pPrevHit;				  // previous value of pHit; also, start of next test
	byte *pTestHead;				  // current test string
	byte *pTestTail;				  // current walking pointer; one past the current test buffer
	byte *pEnd;						  // 1 past the end of the input buffer

	pHit = pPrevHit = pTestHead = pBuffer = buf;
	pTestTail = pTestHead + 1;
	pEnd = buf + len;				  // should point to a 0!

	// make a dest buffer and reassign the local buffer
	buf = new byte[6000];
	len = 0;							  // used to walk through the output buffer

	// loop, absorbing one more char from the input buffer on each pass
	for (; pTestHead != pEnd; pTestTail++)
	{
		// if we already have 10 char match, don't bother scanning again for the 11th (wasted time)
		if (pTestTail - pTestHead != (1 << COUNT_BITS) + 3)
		{
			// scan in the previous data for a match
			// terminate the test string (and the matcher string, as well!) in a 0
			byte tmp = *pTestTail;

			*pTestTail = 0;
			pHit = (byte *) strstr((const char *) pPrevHit,
				(const char *) pTestHead);
			*pTestTail = tmp;		  // restore the char
		}

		// on a mismatch or end of buffer, issued codes
		if (pHit == pTestHead
			|| pTestTail - pTestHead > (1 << COUNT_BITS) + 2
			|| pTestTail == pEnd)
		{
			// issue the codes
			// first, check for short runs
			if (pTestTail - pTestHead < 4)
			{
				if (pTestHead[0] > 0x7F || pTestHead[0] <= 8)
					buf[len++] = 1;
				buf[len++] = pTestHead[0];
				pTestHead++;
			}
			// for longer runs, issue a run-code
			else
			{
				unsigned int dist = pTestHead - pPrevHit;
				unsigned int compound =
					(dist << COUNT_BITS) + pTestTail - pTestHead - 4;

//if (dist>=(1<<DISP_BITS)) printf("\n!! error dist overflow");
//if (pTestTail-pTestHead-4>7) printf("\n!! error len overflow");

				buf[len++] = 0x80 + (compound >> 8);
				buf[len++] = compound & 0xFF;
//printf("\nissuing code for sequence len %d <%c%c%c>",pTestTail-pTestHead-1,pTestHead[0],pTestHead[1],pTestHead[2]);
//printf("\n          <%x%x>",pOut[-2],pOut[-1]);
				// and start again
				pTestHead = pTestTail - 1;
			}
			// start the search again
			pPrevHit = pBuffer;
			// within range
			if (pTestHead - pPrevHit > ((1 << DISP_BITS) - 1))
				pPrevHit = pTestHead - ((1 << DISP_BITS) - 1);
		}
		// got a match
		else
		{
			pPrevHit = pHit;
		}
		// when we get to the end of the buffer, don't inc past the end
		// this forces the residue chars out one at a time
		if (pTestTail == pEnd)
			pTestTail--;
	}


	// final scan to merge consecutive high chars together
	// and merge space chars
	unsigned int k;

	for (i = k = 0; i < len; i++, k++)
	{
		buf[k] = buf[i];
		// skip the run-length codes
		if (buf[k] >= 0x80 && buf[k] < 0xC0)
			buf[++k] = buf[++i];
		// if we hit a high char marker, look ahead for another
		// and merge multiples together
		else if (buf[k] == 1)
		{
			buf[k + 1] = buf[i + 1];
			while (i + 2 < len && buf[i + 2] == 1 && buf[k] < 8)
			{
				buf[k]++;
				buf[k + buf[k]] = buf[i + 3];
				i += 2;
			}
			k += buf[k];
			i++;
		}
		else if (buf[k] == ' ' && i < len - 1 && buf[i + 1] <= 0x7F
			&& buf[i + 1] >= 0x40)
			buf[k] = 0x80 | buf[++i];
	}

	// delete original buffer
	delete[]pBuffer;
	len = k;

	isCompressed = true;
	return k;
}

/*
	Decompress

	params:	none

	action: make a new buffer
					run through the source data
					check the 4 cases:
						0,9...7F represent self
						1...8		escape n chars
						80...bf reference earlier run
						c0...ff	space+ASCII

*/
unsigned tBuf::Decompress()
{
	if (!buf)
		return 0;
	if (!isCompressed) {
//		cout<<"Buffer already uncompressed. Doing nothing"<<endl;
		return len;
//	} else {
//		cout<<"Decompressing buffer"<<endl;
	}

	// we "know" that all decompresses fit within 4096, right?
	byte *pOut = new byte[6000];
	byte *in_buf = buf;
	byte *out_buf = pOut;

	unsigned int i, j;

	for (j = i = 0; j < len;)
	{
		unsigned int c;

		// take a char from the input buffer
		c = in_buf[j++];

		// separate the char into zones: 0, 1...8, 9...0x7F, 0x80...0xBF, 0xC0...0xFF

		// codes 1...8 mean copy that many bytes; for accented chars & binary
		if (c > 0 && c < 9)
			while (c--)
				out_buf[i++] = in_buf[j++];

		// codes 0, 9...0x7F represent themselves
		else if (c < 0x80)
			out_buf[i++] = c;

		// codes 0xC0...0xFF represent "space + ascii char"
		else if (c >= 0xC0)
			out_buf[i++] = ' ', out_buf[i++] = c ^ 0x80;

		// codes 0x80...0xBf represent sequences
		else
		{
			int m, n;

			c <<= 8;
			c += in_buf[j++];
			m = (c & 0x3FFF) >> COUNT_BITS;
			n = c & ((1 << COUNT_BITS) - 1);
			n += 3;
			while (n--)
			{
				out_buf[i] = out_buf[i - m];
				i++;
			}
		}
	}
	out_buf[i++]='\0';
	out_buf[i++]='\0';
	delete[]buf;
	buf = pOut;
	len = i;

	isCompressed = false;
	return i;
}

unsigned tBuf::DuplicateCR()
{
	if (!buf)
		return 0;
	byte *pBuf = new byte[2 * len];

	unsigned int k, j;

	for (j = k = 0; j < len; j++, k++)
	{
		pBuf[k] = buf[j];
		if (pBuf[k] == 0x0A)
			pBuf[k++] = 0x0D, pBuf[k] = 0x0A;
	}
	delete[]buf;
	buf = pBuf;
	len = k;
	return k;
}


// this nasty little beast removes really low ASCII and 0's
// and handles the CR problem
//
// if a cr appears before a lf, then remove the cr
// if a cr appears in isolation, change to a lf
unsigned tBuf::RemoveBinary()
{
	if (!buf)
		return 0;
	byte *in_buf = buf;
	byte *out_buf = new byte[len];

	unsigned int k, j;

	for (j = k = 0; j < len; j++, k++)
	{
		// copy each byte
		out_buf[k] = in_buf[j];

		// throw away really low ASCII
		if (( /*out_buf[k]>=0 && */ out_buf[k] < 9))
			k--;

		// for CR
		if (out_buf[k] == 0x0D)
		{
			// if next is LF, then drop it
			if (j < len - 1 && in_buf[j + 1] == 0x0A)
				k--;
			else						  // turn it into a LF
				out_buf[k] = 0x0A;
		}
	}
	delete[]buf;
	buf = out_buf;
	len = k;
	return k;
}

void tBuf::setText(const byte * text, unsigned txtlen, bool txtcomp)
{
	if (buf)
		delete[]buf;
	buf = 0L;

	if (txtlen <= 0)
		txtlen = strlen((const char *) text);
	len = txtlen;
	buf = new byte[len];

	memcpy(buf, text, len*sizeof(char));
//	strncpy((char *) buf, (const char *) text, len);
	isCompressed = txtcomp;
//	cout<<"Setting text, compressed="<<txtcomp<<endl;
}
Moved kpilot from kdepim to applications, as the core Trinity libraries should not contain hardware-dependent software git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/kpilot@1221127 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`// based on: MakeDoc, version 2`
			`// I only took the tBuf class from there and adapted it.`
			`//`
			`// Compresses text files into a format that is ready to export to a Pilot`
			`// and work with Rick Bram's PilotDOC reader.`
			`// Copyright (C) Reinhold Kainhofer, 2002`
			`// Copyrigth (C) Pat Beirne, 2000`
			`//`
			`// Original file (makedoc9.cpp) copyright by:`
			`// Copyright (C) Pat Beirne, 2000.`
			`// Distributable under the GNU General Public License Version 2 or later.`
			`//`
			`// ver 0.6 enforce 31 char limit on database names`
			`// ver 0.7 change header and record0 to structs`
			`// ver 2.0 added category control on the command line`
			`// changed extensions from .prc to .pdb`

			`/*`
			`** This program is free software; you can redistribute it and/or modify`
			`** it under the terms of the GNU General Public License as published by`
			`** the Free Software Foundation; either version 2 of the License, or`
			`** (at your option) any later version.`
			`**`
			`** This program is distributed in the hope that it will be useful,`
			`** but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`** GNU General Public License for more details.`
			`**`
			`** You should have received a copy of the GNU General Public License`
			`** along with this program in a file called COPYING; if not, write to`
			`** the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,`
			`** MA 02110-1301, USA.`
			`*/`


			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <string.h>`

			`#include <iostream>`


			`#include "makedoc9.h"`



			`//`
			`// Issue()`
			`//`
			`// action: handle the details of writing a single`
			`// character to the compressed stream`
			`//`
			`unsigned`
			`tBuf::Issue(byte src, int &bSpace)`
			`{`
			`unsigned int iDest = len;`
			`byte *dest = buf;`

			`// TODO: which of the if parts should really be included???`
			`#if 0`
			`// modified version of issue`
			`// just issue the char`
			`if (src >= 0x80 \|\| src <= 8)`
			`dest[iDest++] = 1;`
			`dest[iDest++] = src;`

			`#else`
			`// if there is an outstanding space char, see if`
			`// we can squeeze it in with an ASCII char`
			`if (bSpace)`
			`{`
			`if (src >= 0x40 && src <= 0x7F)`
			`dest[iDest++] = src ^ 0x80;`
			`else`
			`{`
			`// couldn't squeeze it in, so issue the space char by itself`
			`// most chars go out simple, except the range 1...8,0x80...0xFF`
			`dest[iDest++] = ' ';`
			`if (src < 0x80 && (src == 0 \|\| src > 8))`
			`dest[iDest++] = src;`
			`else`
			`dest[iDest++] = 1, dest[iDest++] = src;`
			`}`
			`// knock down the space flag`
			`bSpace = 0;`
			`}`
			`else`
			`{`
			`// check for a space char`
			`if (src == ' ')`
			`bSpace = 1;`
			`else`
			`{`
			`if (src < 0x80 && (src == 0 \|\| src > 8))`
			`dest[iDest++] = src;`
			`else`
			`dest[iDest++] = 1, dest[iDest++] = src;`

			`}`
			`}`
			`#endif`
			`len = iDest;`
			`return iDest;`
			`}`

			`//`
			`// Compress`
			`//`
			`// params: none`
			`//`
			`// action: takes the given buffer,`
			`// and compresses`
			`// the original data down into a second buffer`
			`//`
			`// comment: This version make heavy use of walking pointers.`
			`//`
			`unsigned tBuf::Compress()`
			`{`
			`if (!buf)`
			`return 0;`
			`if (isCompressed) {`
			`// cout<<"Buffer is already compressed!"<<endl;`
			`return len;`
			`// } else {`
			`// cout<<" Compressing buffer!!!"<<endl;`
			`}`

			`unsigned int i;`

			`// run through the input buffer`
			`byte *pBuffer; // points to the input buffer`
			`byte *pHit; // points to a walking test hit; works upwards on successive matches`
			`byte *pPrevHit; // previous value of pHit; also, start of next test`
			`byte *pTestHead; // current test string`
			`byte *pTestTail; // current walking pointer; one past the current test buffer`
			`byte *pEnd; // 1 past the end of the input buffer`

			`pHit = pPrevHit = pTestHead = pBuffer = buf;`
			`pTestTail = pTestHead + 1;`
			`pEnd = buf + len; // should point to a 0!`

			`// make a dest buffer and reassign the local buffer`
			`buf = new byte[6000];`
			`len = 0; // used to walk through the output buffer`

			`// loop, absorbing one more char from the input buffer on each pass`
			`for (; pTestHead != pEnd; pTestTail++)`
			`{`
			`// if we already have 10 char match, don't bother scanning again for the 11th (wasted time)`
			`if (pTestTail - pTestHead != (1 << COUNT_BITS) + 3)`
			`{`
			`// scan in the previous data for a match`
			`// terminate the test string (and the matcher string, as well!) in a 0`
			`byte tmp = *pTestTail;`

			`*pTestTail = 0;`
			`pHit = (byte ) strstr((const char ) pPrevHit,`
			`(const char *) pTestHead);`
			`*pTestTail = tmp; // restore the char`
			`}`

			`// on a mismatch or end of buffer, issued codes`
			`if (pHit == pTestHead`
			`\|\| pTestTail - pTestHead > (1 << COUNT_BITS) + 2`
			`\|\| pTestTail == pEnd)`
			`{`
			`// issue the codes`
			`// first, check for short runs`
			`if (pTestTail - pTestHead < 4)`
			`{`
			`if (pTestHead[0] > 0x7F \|\| pTestHead[0] <= 8)`
			`buf[len++] = 1;`
			`buf[len++] = pTestHead[0];`
			`pTestHead++;`
			`}`
			`// for longer runs, issue a run-code`
			`else`
			`{`
			`unsigned int dist = pTestHead - pPrevHit;`
			`unsigned int compound =`
			`(dist << COUNT_BITS) + pTestTail - pTestHead - 4;`

			`//if (dist>=(1<<DISP_BITS)) printf("\n!! error dist overflow");`
			`//if (pTestTail-pTestHead-4>7) printf("\n!! error len overflow");`

			`buf[len++] = 0x80 + (compound >> 8);`
			`buf[len++] = compound & 0xFF;`
			`//printf("\nissuing code for sequence len %d <%c%c%c>",pTestTail-pTestHead-1,pTestHead[0],pTestHead[1],pTestHead[2]);`
			`//printf("\n <%x%x>",pOut[-2],pOut[-1]);`
			`// and start again`
			`pTestHead = pTestTail - 1;`
			`}`
			`// start the search again`
			`pPrevHit = pBuffer;`
			`// within range`
			`if (pTestHead - pPrevHit > ((1 << DISP_BITS) - 1))`
			`pPrevHit = pTestHead - ((1 << DISP_BITS) - 1);`
			`}`
			`// got a match`
			`else`
			`{`
			`pPrevHit = pHit;`
			`}`
			`// when we get to the end of the buffer, don't inc past the end`
			`// this forces the residue chars out one at a time`
			`if (pTestTail == pEnd)`
			`pTestTail--;`
			`}`


			`// final scan to merge consecutive high chars together`
			`// and merge space chars`
			`unsigned int k;`

			`for (i = k = 0; i < len; i++, k++)`
			`{`
			`buf[k] = buf[i];`
			`// skip the run-length codes`
			`if (buf[k] >= 0x80 && buf[k] < 0xC0)`
			`buf[++k] = buf[++i];`
			`// if we hit a high char marker, look ahead for another`
			`// and merge multiples together`
			`else if (buf[k] == 1)`
			`{`
			`buf[k + 1] = buf[i + 1];`
			`while (i + 2 < len && buf[i + 2] == 1 && buf[k] < 8)`
			`{`
			`buf[k]++;`
			`buf[k + buf[k]] = buf[i + 3];`
			`i += 2;`
			`}`
			`k += buf[k];`
			`i++;`
			`}`
			`else if (buf[k] == ' ' && i < len - 1 && buf[i + 1] <= 0x7F`
			`&& buf[i + 1] >= 0x40)`
			`buf[k] = 0x80 \| buf[++i];`
			`}`

			`// delete original buffer`
			`delete[]pBuffer;`
			`len = k;`

			`isCompressed = true;`
			`return k;`
			`}`

			`/*`
			`Decompress`

			`params: none`

			`action: make a new buffer`
			`run through the source data`
			`check the 4 cases:`
			`0,9...7F represent self`
			`1...8 escape n chars`
			`80...bf reference earlier run`
			`c0...ff space+ASCII`

			`*/`
			`unsigned tBuf::Decompress()`
			`{`
			`if (!buf)`
			`return 0;`
			`if (!isCompressed) {`
			`// cout<<"Buffer already uncompressed. Doing nothing"<<endl;`
			`return len;`
			`// } else {`
			`// cout<<"Decompressing buffer"<<endl;`
			`}`

			`// we "know" that all decompresses fit within 4096, right?`
			`byte *pOut = new byte[6000];`
			`byte *in_buf = buf;`
			`byte *out_buf = pOut;`

			`unsigned int i, j;`

			`for (j = i = 0; j < len;)`
			`{`
			`unsigned int c;`

			`// take a char from the input buffer`
			`c = in_buf[j++];`

			`// separate the char into zones: 0, 1...8, 9...0x7F, 0x80...0xBF, 0xC0...0xFF`

			`// codes 1...8 mean copy that many bytes; for accented chars & binary`
			`if (c > 0 && c < 9)`
			`while (c--)`
			`out_buf[i++] = in_buf[j++];`

			`// codes 0, 9...0x7F represent themselves`
			`else if (c < 0x80)`
			`out_buf[i++] = c;`

			`// codes 0xC0...0xFF represent "space + ascii char"`
			`else if (c >= 0xC0)`
			`out_buf[i++] = ' ', out_buf[i++] = c ^ 0x80;`

			`// codes 0x80...0xBf represent sequences`
			`else`
			`{`
			`int m, n;`

			`c <<= 8;`
			`c += in_buf[j++];`
			`m = (c & 0x3FFF) >> COUNT_BITS;`
			`n = c & ((1 << COUNT_BITS) - 1);`
			`n += 3;`
			`while (n--)`
			`{`
			`out_buf[i] = out_buf[i - m];`
			`i++;`
			`}`
			`}`
			`}`
			`out_buf[i++]='\0';`
			`out_buf[i++]='\0';`
			`delete[]buf;`
			`buf = pOut;`
			`len = i;`

			`isCompressed = false;`
			`return i;`
			`}`

			`unsigned tBuf::DuplicateCR()`
			`{`
			`if (!buf)`
			`return 0;`
			`byte pBuf = new byte[2 len];`

			`unsigned int k, j;`

			`for (j = k = 0; j < len; j++, k++)`
			`{`
			`pBuf[k] = buf[j];`
			`if (pBuf[k] == 0x0A)`
			`pBuf[k++] = 0x0D, pBuf[k] = 0x0A;`
			`}`
			`delete[]buf;`
			`buf = pBuf;`
			`len = k;`
			`return k;`
			`}`



			`// this nasty little beast removes really low ASCII and 0's`
			`// and handles the CR problem`
			`//`
			`// if a cr appears before a lf, then remove the cr`
			`// if a cr appears in isolation, change to a lf`
			`unsigned tBuf::RemoveBinary()`
			`{`
			`if (!buf)`
			`return 0;`
			`byte *in_buf = buf;`
			`byte *out_buf = new byte[len];`

			`unsigned int k, j;`

			`for (j = k = 0; j < len; j++, k++)`
			`{`
			`// copy each byte`
			`out_buf[k] = in_buf[j];`

			`// throw away really low ASCII`
			`if (( /out_buf[k]>=0 && / out_buf[k] < 9))`
			`k--;`

			`// for CR`
			`if (out_buf[k] == 0x0D)`
			`{`
			`// if next is LF, then drop it`
			`if (j < len - 1 && in_buf[j + 1] == 0x0A)`
			`k--;`
			`else // turn it into a LF`
			`out_buf[k] = 0x0A;`
			`}`
			`}`
			`delete[]buf;`
			`buf = out_buf;`
			`len = k;`
			`return k;`
			`}`

			`void tBuf::setText(const byte * text, unsigned txtlen, bool txtcomp)`
			`{`
			`if (buf)`
			`delete[]buf;`
			`buf = 0L;`

			`if (txtlen <= 0)`
			`txtlen = strlen((const char *) text);`
			`len = txtlen;`
			`buf = new byte[len];`

			`memcpy(buf, text, len*sizeof(char));`
			`// strncpy((char ) buf, (const char ) text, len);`
			`isCompressed = txtcomp;`
			`// cout<<"Setting text, compressed="<<txtcomp<<endl;`
			`}`