diff options
Diffstat (limited to 'main/src/cgeo/geocaching')
| -rw-r--r-- | main/src/cgeo/geocaching/files/GPXParser.java | 6 | ||||
| -rw-r--r-- | main/src/cgeo/geocaching/files/InvalidXMLCharacterFilterReader.java | 98 |
2 files changed, 103 insertions, 1 deletions
diff --git a/main/src/cgeo/geocaching/files/GPXParser.java b/main/src/cgeo/geocaching/files/GPXParser.java index 8d328e4..5553927 100644 --- a/main/src/cgeo/geocaching/files/GPXParser.java +++ b/main/src/cgeo/geocaching/files/GPXParser.java @@ -23,6 +23,7 @@ import cgeo.geocaching.utils.Log; import cgeo.geocaching.utils.MatcherWrapper; import cgeo.geocaching.utils.SynchronizedDateFormat; +import org.apache.commons.lang3.CharEncoding; import org.apache.commons.lang3.StringUtils; import org.xml.sax.Attributes; import org.xml.sax.SAXException; @@ -34,8 +35,10 @@ import android.sax.RootElement; import android.sax.StartElementListener; import android.util.Xml; +import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.text.ParseException; import java.util.ArrayList; import java.util.Collection; @@ -809,7 +812,8 @@ public abstract class GPXParser extends FileParser { try { progressStream = new ProgressInputStream(stream); - Xml.parse(progressStream, Xml.Encoding.UTF_8, root.getContentHandler()); + BufferedReader reader = new BufferedReader(new InputStreamReader(progressStream, CharEncoding.UTF_8)); + Xml.parse(new InvalidXMLCharacterFilterReader(reader), root.getContentHandler()); return DataStore.loadCaches(result, EnumSet.of(LoadFlag.LOAD_DB_MINIMAL)); } catch (final SAXException e) { throw new ParserException("Cannot parse .gpx file as GPX " + version + ": could not parse XML", e); diff --git a/main/src/cgeo/geocaching/files/InvalidXMLCharacterFilterReader.java b/main/src/cgeo/geocaching/files/InvalidXMLCharacterFilterReader.java new file mode 100644 index 0000000..a7a3e1b --- /dev/null +++ b/main/src/cgeo/geocaching/files/InvalidXMLCharacterFilterReader.java @@ -0,0 +1,98 @@ +package cgeo.geocaching.files;
+
+import org.apache.commons.lang3.StringUtils;
+
+import java.io.FilterReader;
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * Filter reader which can filter out invalid XML characters and character references.
+ *
+ */
+public class InvalidXMLCharacterFilterReader extends FilterReader
+{
+
+ public InvalidXMLCharacterFilterReader(Reader in) {
+ super(in);
+ }
+
+ /**
+ * Every overload of {@link Reader#read()} method delegates to this one so
+ * it is enough to override only this one. <br />
+ * To skip invalid characters this method shifts only valid chars to left
+ * and returns decreased value of the original read method. So after last
+ * valid character there will be some unused chars in the buffer.
+ *
+ * @return Number of read valid characters or <code>-1</code> if end of the
+ * underling reader was reached.
+ */
+ @Override
+ public int read(char[] cbuf, int off, int len) throws IOException {
+ int read = super.read(cbuf, off, len);
+ // check for end
+ if (read == -1) {
+ return -1;
+ }
+ // target position
+ int pos = off - 1;
+
+ int entityStart = -1;
+ for (int readPos = off; readPos < off + read; readPos++) {
+ boolean useChar = true;
+ switch (cbuf[readPos]) {
+ case '&':
+ pos++;
+ entityStart = readPos;
+ break;
+ case ';':
+ pos++;
+ if (entityStart >= 0) {
+ int entityLength = readPos - entityStart + 1;
+ if (entityLength <= 5) {
+ String entity = new String(cbuf, entityStart, entityLength);
+ if (StringUtils.startsWith(entity, "&#")) {
+ String numberString = StringUtils.substringBetween(entity, "&#", ";");
+ final int value;
+ if (StringUtils.startsWith(numberString, "x")) {
+ value = Integer.parseInt(numberString.substring(1), 16);
+ }
+ else {
+ value = Integer.parseInt(numberString);
+ }
+ if (!isValidXMLChar((char) value)) {
+ pos -= entityLength;
+ useChar = false;
+ }
+ }
+ }
+ }
+ break;
+ default:
+ if (isValidXMLChar(cbuf[readPos])) {
+ pos++;
+ } else {
+ continue;
+ }
+ }
+ // copy, and skip unwanted characters
+ if (pos < readPos && useChar) {
+ cbuf[pos] = cbuf[readPos];
+ }
+ }
+ return pos - off + 1;
+ }
+
+ private static boolean isValidXMLChar(char c) {
+ if ((c == 0x9) ||
+ (c == 0xA) ||
+ (c == 0xD) ||
+ ((c >= 0x20) && (c <= 0xD7FF)) ||
+ ((c >= 0xE000) && (c <= 0xFFFD)) ||
+ ((c >= 0x10000) && (c <= 0x10FFFF)))
+ {
+ return true;
+ }
+ return false;
+ }
+}
\ No newline at end of file |
