You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
rosegarden/src/base/XmlExportable.cpp

197 lines
4.9 KiB

/*
Rosegarden
A sequencer and musical notation editor.
This program is Copyright 2000-2008
Guillaume Laurent <glaurent@telegraph-road.org>,
Chris Cannam <cannam@all-day-breakfast.com>,
Richard Bown <bownie@bownie.com>
The moral right of the authors to claim authorship of this work
has been asserted.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#include "XmlExportable.h"
#include <iostream>
#include <cstdlib>
#include <cstring>
namespace Rosegarden
{
static std::string s1;
static std::string multibyte;
std::string XmlExportable::encode(const std::string &s0)
{
static char *buffer = 0;
static size_t bufsiz = 0;
size_t buflen = 0;
static char multibyte[20];
size_t mblen = 0;
size_t len = s0.length();
if (bufsiz < len * 2 + 10) {
bufsiz = len * 2 + 10;
buffer = (char *)malloc(bufsiz);
}
// Escape any xml special characters, and also make sure we have
// valid utf8 -- otherwise we won't be able to re-read the xml.
// Amazing how complicated this gets.
bool warned = false; // no point in warning forever for long bogus strings
for (size_t i = 0; i < len; ++i) {
unsigned char c = s0[i];
if (((c & 0xc0) == 0xc0) || !(c & 0x80)) {
// 11xxxxxx or 0xxxxxxx: first byte of a character sequence
if (mblen > 0) {
// does multibyte contain a valid sequence?
unsigned int length =
(!(multibyte[0] & 0x20)) ? 2 :
(!(multibyte[0] & 0x10)) ? 3 :
(!(multibyte[0] & 0x08)) ? 4 :
(!(multibyte[0] & 0x04)) ? 5 : 0;
if (length == 0 || mblen == length) {
if (bufsiz < buflen + mblen + 1) {
bufsiz = 2 * buflen + mblen + 1;
buffer = (char *)realloc(buffer, bufsiz);
}
strncpy(buffer + buflen, multibyte, mblen);
buflen += mblen;
} else {
if (!warned) {
std::cerr
<< "WARNING: Invalid utf8 char width in string \""
<< s0 << "\" at index " << i << " ("
<< mblen << " octet"
<< (mblen != 1 ? "s" : "")
<< ", expected " << length << ")" << std::endl;
warned = true;
}
// and drop the character
}
}
mblen = 0;
if (!(c & 0x80)) { // ascii
if (bufsiz < buflen + 10) {
bufsiz = 2 * buflen + 10;
buffer = (char *)realloc(buffer, bufsiz);
}
switch (c) {
case '&' : strncpy(buffer + buflen, "&amp;", 5); buflen += 5; break;
case '<' : strncpy(buffer + buflen, "&lt;", 4); buflen += 4; break;
case '>' : strncpy(buffer + buflen, "&gt;", 4); buflen += 4; break;
case '"' : strncpy(buffer + buflen, "&quot;", 6); buflen += 6; break;
case '\'' : strncpy(buffer + buflen, "&apos;", 6); buflen += 6; break;
case 0x9:
case 0xa:
case 0xd:
// convert these special cases to plain whitespace:
buffer[buflen++] = ' ';
break;
default:
if (c >= 32) buffer[buflen++] = c;
else {
if (!warned) {
std::cerr
<< "WARNING: Invalid utf8 octet in string \""
<< s0 << "\" at index " << i << " ("
<< (int)c << " < 32)" << std::endl;
}
warned = true;
}
}
} else {
// store in multibyte rather than straight to s1, so
// that we know we're in the middle of something
// (below). At this point we know mblen == 0.
multibyte[mblen++] = c;
}
} else {
// second or subsequent byte
if (mblen == 0) { // ... without a first byte!
if (!warned) {
std::cerr
<< "WARNING: Invalid utf8 octet sequence in string \""
<< s0 << "\" at index " << i << std::endl;
warned = true;
}
} else {
if (mblen >= sizeof(multibyte)-1) {
if (!warned) {
std::cerr
<< "WARNING: Character too wide in string \""
<< s0 << "\" at index " << i << " (reached width of "
<< mblen << ")" << std::endl;
}
warned = true;
mblen = 0;
} else {
multibyte[mblen++] = c;
}
}
}
}
if (mblen > 0) {
// does multibyte contain a valid sequence?
unsigned int length =
(!(multibyte[0] & 0x20)) ? 2 :
(!(multibyte[0] & 0x10)) ? 3 :
(!(multibyte[0] & 0x08)) ? 4 :
(!(multibyte[0] & 0x04)) ? 5 : 0;
if (length == 0 || mblen == length) {
if (bufsiz < buflen + mblen + 1) {
bufsiz = 2 * buflen + mblen + 1;
buffer = (char *)realloc(buffer, bufsiz);
}
strncpy(buffer + buflen, multibyte, mblen);
buflen += mblen;
} else {
if (!warned) {
std::cerr
<< "WARNING: Invalid utf8 char width in string \""
<< s0 << "\" at index " << len << " ("
<< mblen << " octet"
<< (mblen != 1 ? "s" : "")
<< ", expected " << length << ")" << std::endl;
warned = true;
}
// and drop the character
}
}
buffer[buflen] = '\0';
return buffer;
}
}