Commit ad8fe4b5 authored by Florian Schmaus's avatar Florian Schmaus Committed by flow

Make illegal characters detection even more strict

The previous implementation allowed for high surrogates followed by
non-low surrogates. We now assure that every high surrogate is followed
by a low surrogate (under the assumption that the last byte in an XMPP
stream can not be a unicode surrogate pair).

OF-391

git-svn-id: http://svn.igniterealtime.org/svn/repos/openfire/trunk@13996 b35dd754-fafc-0310-a699-88a17e54d16e
parent c0a4fc18
...@@ -370,12 +370,12 @@ public class MXParser extends org.xmlpull.mxp1.MXParser { ...@@ -370,12 +370,12 @@ public class MXParser extends org.xmlpull.mxp1.MXParser {
* Note that when MXParser is being modified to handle multibyte chars correctly, this method needs to change (as * Note that when MXParser is being modified to handle multibyte chars correctly, this method needs to change (as
* then, there are more codepoints to check). * then, there are more codepoints to check).
* *
* Also note that the current implementations allows high surrogates followed by non low surrogates.
*/ */
@Override @Override
protected char more() throws IOException, XmlPullParserException { protected char more() throws IOException, XmlPullParserException {
final char codePoint = super.more(); // note - this does NOT return a codepoint now, but simply a (single byte) character! final char codePoint = super.more(); // note - this does NOT return a codepoint now, but simply a (double byte) character!
boolean validCodepoint = false; boolean validCodepoint = false;
boolean isLowSurrogate = Character.isLowSurrogate(codePoint);
if ((codePoint == 0x0) || // 0x0 is not allowed, but flash clients insist on sending this as the very first character of a stream. We should stop allowing this codepoint after the first byte has been parsed. if ((codePoint == 0x0) || // 0x0 is not allowed, but flash clients insist on sending this as the very first character of a stream. We should stop allowing this codepoint after the first byte has been parsed.
(codePoint == 0x9) || (codePoint == 0x9) ||
(codePoint == 0xA) || (codePoint == 0xA) ||
...@@ -384,12 +384,15 @@ public class MXParser extends org.xmlpull.mxp1.MXParser { ...@@ -384,12 +384,15 @@ public class MXParser extends org.xmlpull.mxp1.MXParser {
((codePoint >= 0xE000) && (codePoint <= 0xFFFD))) { ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))) {
validCodepoint = true; validCodepoint = true;
} }
else if (Character.isLowSurrogate(codePoint)) { else if (highSurrogateSeen) {
if (highSurrogateSeen) { if (isLowSurrogate) {
validCodepoint = true; validCodepoint = true;
} else { } else {
throw new XmlPullParserException("Low surrogate '0x " + String.format("%x", (int) codePoint) + " without preceeding high surrogate"); throw new XmlPullParserException("High surrogate followed by non low surrogate '0x" + String.format("%x", (int) codePoint) + "'");
}
} }
else if (isLowSurrogate) {
throw new XmlPullParserException("Low surrogate '0x " + String.format("%x", (int) codePoint) + " without preceeding high surrogate");
} }
else if (Character.isHighSurrogate(codePoint)) { else if (Character.isHighSurrogate(codePoint)) {
highSurrogateSeen = true; highSurrogateSeen = true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment