Commit c0a4fc18 authored by Florian Schmaus's avatar Florian Schmaus Committed by flow

Fix broken codePoint check (OF-391)

char in java is 16-bit. Therefore checking for anything greater then
0xFFFF makes no sense. We want to know here if the character is maybe
part of a surrogate pair, that's why 'is(High|Low)Surrogate(char)' is
now used.

git-svn-id: http://svn.igniterealtime.org/svn/repos/openfire/trunk@13995 b35dd754-fafc-0310-a699-88a17e54d16e
parent 1f6ce239
...@@ -362,25 +362,45 @@ public class MXParser extends org.xmlpull.mxp1.MXParser { ...@@ -362,25 +362,45 @@ public class MXParser extends org.xmlpull.mxp1.MXParser {
inputEncoding = oldEncoding; inputEncoding = oldEncoding;
} }
private boolean highSurrogateSeen = false;
/** /**
* Makes sure that each individual character is a valid XML character. * Makes sure that each individual character is a valid XML character.
* *
* Note that when MXParser is being modified to handle multibyte chars correctly, this method needs to change (as * Note that when MXParser is being modified to handle multibyte chars correctly, this method needs to change (as
* then, there are more codepoints to check). * then, there are more codepoints to check).
*
* Also note that the current implementations allows high surrogates followed by non low surrogates.
*/ */
@Override @Override
protected char more() throws IOException, XmlPullParserException { protected char more() throws IOException, XmlPullParserException {
final char codePoint = super.more(); // note - this does NOT return a codepoint now, but simply a (single byte) character! final char codePoint = super.more(); // note - this does NOT return a codepoint now, but simply a (single byte) character!
boolean validCodepoint = false;
if ((codePoint == 0x0) || // 0x0 is not allowed, but flash clients insist on sending this as the very first character of a stream. We should stop allowing this codepoint after the first byte has been parsed. if ((codePoint == 0x0) || // 0x0 is not allowed, but flash clients insist on sending this as the very first character of a stream. We should stop allowing this codepoint after the first byte has been parsed.
(codePoint == 0x9) || (codePoint == 0x9) ||
(codePoint == 0xA) || (codePoint == 0xA) ||
(codePoint == 0xD) || (codePoint == 0xD) ||
((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) ||
((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD))) {
((codePoint >= 0x10000) && (codePoint <= 0x10FFFF))) { validCodepoint = true;
}
else if (Character.isLowSurrogate(codePoint)) {
if (highSurrogateSeen) {
validCodepoint = true;
} else {
throw new XmlPullParserException("Low surrogate '0x " + String.format("%x", (int) codePoint) + " without preceeding high surrogate");
}
}
else if (Character.isHighSurrogate(codePoint)) {
highSurrogateSeen = true;
// Return here so that highSurrogateSeen is not reset
return codePoint; return codePoint;
} }
// Always reset high surrogate seen
highSurrogateSeen = false;
if (validCodepoint)
return codePoint;
throw new XmlPullParserException("Illegal XML character: " + Integer.parseInt(codePoint+"", 16)); throw new XmlPullParserException("Illegal XML character '0x" + String.format("%x", (int) codePoint) + "'");
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment