aboutsummaryrefslogtreecommitdiffstats
path: root/main/src/cgeo/geocaching/files
diff options
context:
space:
mode:
Diffstat (limited to 'main/src/cgeo/geocaching/files')
-rw-r--r--main/src/cgeo/geocaching/files/GPXParser.java6
-rw-r--r--main/src/cgeo/geocaching/files/InvalidXMLCharacterFilterReader.java98
2 files changed, 103 insertions, 1 deletions
diff --git a/main/src/cgeo/geocaching/files/GPXParser.java b/main/src/cgeo/geocaching/files/GPXParser.java
index 8d328e4..5553927 100644
--- a/main/src/cgeo/geocaching/files/GPXParser.java
+++ b/main/src/cgeo/geocaching/files/GPXParser.java
@@ -23,6 +23,7 @@ import cgeo.geocaching.utils.Log;
import cgeo.geocaching.utils.MatcherWrapper;
import cgeo.geocaching.utils.SynchronizedDateFormat;
+import org.apache.commons.lang3.CharEncoding;
import org.apache.commons.lang3.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
@@ -34,8 +35,10 @@ import android.sax.RootElement;
import android.sax.StartElementListener;
import android.util.Xml;
+import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InputStreamReader;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
@@ -809,7 +812,8 @@ public abstract class GPXParser extends FileParser {
try {
progressStream = new ProgressInputStream(stream);
- Xml.parse(progressStream, Xml.Encoding.UTF_8, root.getContentHandler());
+ BufferedReader reader = new BufferedReader(new InputStreamReader(progressStream, CharEncoding.UTF_8));
+ Xml.parse(new InvalidXMLCharacterFilterReader(reader), root.getContentHandler());
return DataStore.loadCaches(result, EnumSet.of(LoadFlag.LOAD_DB_MINIMAL));
} catch (final SAXException e) {
throw new ParserException("Cannot parse .gpx file as GPX " + version + ": could not parse XML", e);
diff --git a/main/src/cgeo/geocaching/files/InvalidXMLCharacterFilterReader.java b/main/src/cgeo/geocaching/files/InvalidXMLCharacterFilterReader.java
new file mode 100644
index 0000000..a7a3e1b
--- /dev/null
+++ b/main/src/cgeo/geocaching/files/InvalidXMLCharacterFilterReader.java
@@ -0,0 +1,98 @@
+package cgeo.geocaching.files;
+
+import org.apache.commons.lang3.StringUtils;
+
+import java.io.FilterReader;
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * Filter reader which can filter out invalid XML characters and character references.
+ *
+ */
+public class InvalidXMLCharacterFilterReader extends FilterReader
+{
+
+ public InvalidXMLCharacterFilterReader(Reader in) {
+ super(in);
+ }
+
+ /**
+ * Every overload of {@link Reader#read()} method delegates to this one so
+ * it is enough to override only this one. <br />
+ * To skip invalid characters this method shifts only valid chars to left
+ * and returns decreased value of the original read method. So after last
+ * valid character there will be some unused chars in the buffer.
+ *
+ * @return Number of read valid characters or <code>-1</code> if end of the
+ * underling reader was reached.
+ */
+ @Override
+ public int read(char[] cbuf, int off, int len) throws IOException {
+ int read = super.read(cbuf, off, len);
+ // check for end
+ if (read == -1) {
+ return -1;
+ }
+ // target position
+ int pos = off - 1;
+
+ int entityStart = -1;
+ for (int readPos = off; readPos < off + read; readPos++) {
+ boolean useChar = true;
+ switch (cbuf[readPos]) {
+ case '&':
+ pos++;
+ entityStart = readPos;
+ break;
+ case ';':
+ pos++;
+ if (entityStart >= 0) {
+ int entityLength = readPos - entityStart + 1;
+ if (entityLength <= 5) {
+ String entity = new String(cbuf, entityStart, entityLength);
+ if (StringUtils.startsWith(entity, "&#")) {
+ String numberString = StringUtils.substringBetween(entity, "&#", ";");
+ final int value;
+ if (StringUtils.startsWith(numberString, "x")) {
+ value = Integer.parseInt(numberString.substring(1), 16);
+ }
+ else {
+ value = Integer.parseInt(numberString);
+ }
+ if (!isValidXMLChar((char) value)) {
+ pos -= entityLength;
+ useChar = false;
+ }
+ }
+ }
+ }
+ break;
+ default:
+ if (isValidXMLChar(cbuf[readPos])) {
+ pos++;
+ } else {
+ continue;
+ }
+ }
+ // copy, and skip unwanted characters
+ if (pos < readPos && useChar) {
+ cbuf[pos] = cbuf[readPos];
+ }
+ }
+ return pos - off + 1;
+ }
+
+ private static boolean isValidXMLChar(char c) {
+ if ((c == 0x9) ||
+ (c == 0xA) ||
+ (c == 0xD) ||
+ ((c >= 0x20) && (c <= 0xD7FF)) ||
+ ((c >= 0xE000) && (c <= 0xFFFD)) ||
+ ((c >= 0x10000) && (c <= 0x10FFFF)))
+ {
+ return true;
+ }
+ return false;
+ }
+} \ No newline at end of file