You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
197 lines
4.9 KiB
197 lines
4.9 KiB
/*
|
|
Rosegarden
|
|
A sequencer and musical notation editor.
|
|
|
|
This program is Copyright 2000-2008
|
|
Guillaume Laurent <glaurent@telegraph-road.org>,
|
|
Chris Cannam <cannam@all-day-breakfast.com>,
|
|
Richard Bown <bownie@bownie.com>
|
|
|
|
The moral right of the authors to claim authorship of this work
|
|
has been asserted.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version. See the file
|
|
COPYING included with this distribution for more information.
|
|
*/
|
|
|
|
#include "XmlExportable.h"
|
|
#include <iostream>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
|
|
namespace Rosegarden
|
|
{
|
|
|
|
static std::string s1;
|
|
static std::string multibyte;
|
|
|
|
std::string XmlExportable::encode(const std::string &s0)
|
|
{
|
|
static char *buffer = 0;
|
|
static size_t bufsiz = 0;
|
|
size_t buflen = 0;
|
|
|
|
static char multibyte[20];
|
|
size_t mblen = 0;
|
|
|
|
size_t len = s0.length();
|
|
|
|
if (bufsiz < len * 2 + 10) {
|
|
bufsiz = len * 2 + 10;
|
|
buffer = (char *)malloc(bufsiz);
|
|
}
|
|
|
|
// Escape any xml special characters, and also make sure we have
|
|
// valid utf8 -- otherwise we won't be able to re-read the xml.
|
|
// Amazing how complicated this gets.
|
|
|
|
bool warned = false; // no point in warning forever for long bogus strings
|
|
|
|
for (size_t i = 0; i < len; ++i) {
|
|
|
|
unsigned char c = s0[i];
|
|
|
|
if (((c & 0xc0) == 0xc0) || !(c & 0x80)) {
|
|
|
|
// 11xxxxxx or 0xxxxxxx: first byte of a character sequence
|
|
|
|
if (mblen > 0) {
|
|
|
|
// does multibyte contain a valid sequence?
|
|
unsigned int length =
|
|
(!(multibyte[0] & 0x20)) ? 2 :
|
|
(!(multibyte[0] & 0x10)) ? 3 :
|
|
(!(multibyte[0] & 0x08)) ? 4 :
|
|
(!(multibyte[0] & 0x04)) ? 5 : 0;
|
|
|
|
if (length == 0 || mblen == length) {
|
|
if (bufsiz < buflen + mblen + 1) {
|
|
bufsiz = 2 * buflen + mblen + 1;
|
|
buffer = (char *)realloc(buffer, bufsiz);
|
|
}
|
|
strncpy(buffer + buflen, multibyte, mblen);
|
|
buflen += mblen;
|
|
} else {
|
|
if (!warned) {
|
|
std::cerr
|
|
<< "WARNING: Invalid utf8 char width in string \""
|
|
<< s0 << "\" at index " << i << " ("
|
|
<< mblen << " octet"
|
|
<< (mblen != 1 ? "s" : "")
|
|
<< ", expected " << length << ")" << std::endl;
|
|
warned = true;
|
|
}
|
|
// and drop the character
|
|
}
|
|
}
|
|
|
|
mblen = 0;
|
|
|
|
if (!(c & 0x80)) { // ascii
|
|
|
|
if (bufsiz < buflen + 10) {
|
|
bufsiz = 2 * buflen + 10;
|
|
buffer = (char *)realloc(buffer, bufsiz);
|
|
}
|
|
|
|
switch (c) {
|
|
case '&' : strncpy(buffer + buflen, "&", 5); buflen += 5; break;
|
|
case '<' : strncpy(buffer + buflen, "<", 4); buflen += 4; break;
|
|
case '>' : strncpy(buffer + buflen, ">", 4); buflen += 4; break;
|
|
case '"' : strncpy(buffer + buflen, """, 6); buflen += 6; break;
|
|
case '\'' : strncpy(buffer + buflen, "'", 6); buflen += 6; break;
|
|
case 0x9:
|
|
case 0xa:
|
|
case 0xd:
|
|
// convert these special cases to plain whitespace:
|
|
buffer[buflen++] = ' ';
|
|
break;
|
|
default:
|
|
if (c >= 32) buffer[buflen++] = c;
|
|
else {
|
|
if (!warned) {
|
|
std::cerr
|
|
<< "WARNING: Invalid utf8 octet in string \""
|
|
<< s0 << "\" at index " << i << " ("
|
|
<< (int)c << " < 32)" << std::endl;
|
|
}
|
|
warned = true;
|
|
}
|
|
}
|
|
|
|
} else {
|
|
|
|
// store in multibyte rather than straight to s1, so
|
|
// that we know we're in the middle of something
|
|
// (below). At this point we know mblen == 0.
|
|
multibyte[mblen++] = c;
|
|
}
|
|
|
|
} else {
|
|
|
|
// second or subsequent byte
|
|
|
|
if (mblen == 0) { // ... without a first byte!
|
|
if (!warned) {
|
|
std::cerr
|
|
<< "WARNING: Invalid utf8 octet sequence in string \""
|
|
<< s0 << "\" at index " << i << std::endl;
|
|
warned = true;
|
|
}
|
|
} else {
|
|
|
|
if (mblen >= sizeof(multibyte)-1) {
|
|
if (!warned) {
|
|
std::cerr
|
|
<< "WARNING: Character too wide in string \""
|
|
<< s0 << "\" at index " << i << " (reached width of "
|
|
<< mblen << ")" << std::endl;
|
|
}
|
|
warned = true;
|
|
mblen = 0;
|
|
} else {
|
|
multibyte[mblen++] = c;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (mblen > 0) {
|
|
// does multibyte contain a valid sequence?
|
|
unsigned int length =
|
|
(!(multibyte[0] & 0x20)) ? 2 :
|
|
(!(multibyte[0] & 0x10)) ? 3 :
|
|
(!(multibyte[0] & 0x08)) ? 4 :
|
|
(!(multibyte[0] & 0x04)) ? 5 : 0;
|
|
|
|
if (length == 0 || mblen == length) {
|
|
if (bufsiz < buflen + mblen + 1) {
|
|
bufsiz = 2 * buflen + mblen + 1;
|
|
buffer = (char *)realloc(buffer, bufsiz);
|
|
}
|
|
strncpy(buffer + buflen, multibyte, mblen);
|
|
buflen += mblen;
|
|
} else {
|
|
if (!warned) {
|
|
std::cerr
|
|
<< "WARNING: Invalid utf8 char width in string \""
|
|
<< s0 << "\" at index " << len << " ("
|
|
<< mblen << " octet"
|
|
<< (mblen != 1 ? "s" : "")
|
|
<< ", expected " << length << ")" << std::endl;
|
|
warned = true;
|
|
}
|
|
// and drop the character
|
|
}
|
|
}
|
|
buffer[buflen] = '\0';
|
|
|
|
return buffer;
|
|
}
|
|
|
|
}
|
|
|