websockets: Add UTF-8 encoding support.

This is not completely standard UTF-8 encoding. Only code points 0-255
are encoded and never encoded to more than two octets. Since '\x00' is
a WebSockets framing character, it's easier for all parties to encode
zero as '\xc4\x80', i.e. 194+128, i.e. UTF-8 256.

This means that a random stream will be slightly more than 50% larger
using this encoding scheme. But it's easy CPU-wise for client and
server to decode/encode. This is especially important for clients
written in languages that have weak bitops, like Javascript (i.e. the
noVNC client).

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
pull/1/head
Joel Martin 13 years ago committed by Johannes Schindelin
parent 0860c4951f
commit 430b8f2449

@ -1840,16 +1840,28 @@ rfbProcessClientNormalMessage(rfbClientPtr cl)
char encBuf2[64]; char encBuf2[64];
#ifdef LIBVNCSERVER_WITH_WEBSOCKETS #ifdef LIBVNCSERVER_WITH_WEBSOCKETS
if (cl->webSockets && cl->webSocketsBase64) { if (cl->webSockets) {
/* With Base64 encoding we need at least 4 bytes */
n = recv(cl->sock, encBuf, 4, MSG_PEEK); n = recv(cl->sock, encBuf, 4, MSG_PEEK);
if ((n > 0) && (n < 4)) { if (cl->webSocketsBase64) {
if (encBuf[0] == '\xff') { /* With Base64 encoding we need at least 4 bytes */
/* Make sure we don't miss a client disconnect on an end frame if ((n > 0) && (n < 4)) {
* marker */ if (encBuf[0] == '\xff') {
n = read(cl->sock, encBuf, 1); /* Make sure we don't miss a client disconnect on an end frame
* marker */
n = read(cl->sock, encBuf, 1);
}
return;
}
} else {
/* With UTF-8 encoding we need at least 3 bytes (framing + 1) */
if ((n == 1) || (n == 2)) {
if (encBuf[0] == '\xff') {
/* Make sure we don't miss a client disconnect on an end frame
* marker */
n = read(cl->sock, encBuf, 1);
}
return;
} }
return;
} }
} }
#endif #endif

@ -330,11 +330,12 @@ webSocketsEncode(rfbClientPtr cl, const char *src, int len)
int int
webSocketsDecode(rfbClientPtr cl, char *dst, int len) webSocketsDecode(rfbClientPtr cl, char *dst, int len)
{ {
int retlen = 0, n, i, avail, modlen, needlen; int retlen = 0, n, i, avail, modlen, needlen, actual;
char *buf, *end = NULL; char *buf, *end = NULL;
unsigned char chr; unsigned char chr, chr2;
buf = cl->decodeBuf; buf = cl->decodeBuf;
n = recv(cl->sock, buf, len*2+2, MSG_PEEK); n = recv(cl->sock, buf, len*2+2, MSG_PEEK);
if (n <= 0) { if (n <= 0) {
@ -342,26 +343,36 @@ webSocketsDecode(rfbClientPtr cl, char *dst, int len)
return n; return n;
} }
if (buf[0] == '\xff') {
i = read(cl->sock, buf, 1); /* Consume marker */
buf++;
n--;
}
if (buf[0] == '\x00') {
i = read(cl->sock, buf, 1); /* Consume marker */
buf++;
n--;
}
/* rfbLog(">> webSocketsDecode, len: %d, n: %d\n", len, n); */
end = memchr(buf, '\xff', len*2+2);
if (!end) {
end = buf + n;
}
avail = end - buf;
if (cl->webSocketsBase64) { if (cl->webSocketsBase64) {
/* Base64 encoded WebSockets stream */ /* Base64 encoded WebSockets stream */
if (buf[0] == '\xff') {
i = read(cl->sock, buf, 1); /* Consume marker */
buf++;
n--;
}
if (n == 0) {
errno = EAGAIN;
return -1;
}
if (buf[0] == '\x00') {
i = read(cl->sock, buf, 1); /* Consume marker */
buf++;
n--;
}
if (n == 0) {
errno = EAGAIN;
return -1;
}
/* end = memchr(buf, '\xff', len*2+2); */
end = memchr(buf, '\xff', n);
if (!end) {
end = buf + n;
}
avail = end - buf;
len -= cl->carrylen; len -= cl->carrylen;
/* Determine how much base64 data we need */ /* Determine how much base64 data we need */
@ -396,7 +407,6 @@ webSocketsDecode(rfbClientPtr cl, char *dst, int len)
retlen += n; retlen += n;
/* Consume the data from socket */ /* Consume the data from socket */
/* rfbLog("here1, needlen: %d, n: %d, len: %d\n", needlen, n, len); */
i = read(cl->sock, buf, needlen); i = read(cl->sock, buf, needlen);
cl->carrylen = n - len; cl->carrylen = n - len;
@ -407,42 +417,62 @@ webSocketsDecode(rfbClientPtr cl, char *dst, int len)
} }
} else { } else {
/* UTF-8 encoded WebSockets stream */ /* UTF-8 encoded WebSockets stream */
actual = 0;
for (needlen = 0; needlen < n && actual < len; needlen++) {
chr = buf[needlen];
if ((chr > 0) && (chr < 128)) {
actual++;
} else if ((chr > 127) && (chr < 255)) {
if (needlen + 1 >= n) {
break;
}
needlen++;
actual++;
}
}
if (actual < len) {
errno = EAGAIN;
return -1;
}
/* Consume what we need */
if ((n = read(cl->sock, buf, needlen)) < needlen) {
return n;
}
while (retlen < len) { while (retlen < len) {
chr = *buf; chr = buf[0];
buf += 1; buf += 1;
if (chr < 128) { if (chr == 0) {
/* Begin frame marker, just skip it */
} else if (chr == 255) {
/* Begin frame marker, just skip it */
} else if (chr < 128) {
dst[retlen++] = chr; dst[retlen++] = chr;
} else { } else {
if (buf >= end) { chr2 = buf[0];
rfbErr("Not enough UTF-8 data to decode\n");
errno = EIO;
return -1;
}
chr = *buf;
buf += 1; buf += 1;
switch (chr) { switch (chr) {
case (unsigned char) '\xc2': case (unsigned char) '\xc2':
dst[retlen++] = chr; dst[retlen++] = chr2;
break; break;
case (unsigned char) '\xc3': case (unsigned char) '\xc3':
dst[retlen++] = chr + 64; dst[retlen++] = chr2 + 64;
break; break;
case (unsigned char) '\xc4': case (unsigned char) '\xc4':
dst[retlen++] = 0; dst[retlen++] = 0;
break; break;
default:
rfbErr("Invalid UTF-8 encoding\n");
errno = EIO;
return -1;
} }
} }
} }
} }
#if 0 /* rfbLog("<< webSocketsDecode, retlen: %d\n", retlen); */
sprintf(debug, "dst:");
for (i = 0; i < retlen; i++) {
sprintf(debug+strlen(debug), "%d,", dst[i]);
}
rfbLog("%s\n", debug);
rfbLog("<< webSocketsDecode, retlen: %d\n", retlen);
#endif
return retlen; return retlen;
} }

Loading…
Cancel
Save