aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBananeweizen <bananeweizen@gmx.de>2013-03-02 13:05:12 +0100
committerBananeweizen <bananeweizen@gmx.de>2013-03-02 13:05:12 +0100
commit710d3b5e41ad920519902f828f5a6ccc0a1c3c34 (patch)
treef1206251a6390a6808950311560d04d0c94c0fe7
parent40765f11f49f15dd9a0a040b631228b45c7211a5 (diff)
downloadcgeo-710d3b5e41ad920519902f828f5a6ccc0a1c3c34.zip
cgeo-710d3b5e41ad920519902f828f5a6ccc0a1c3c34.tar.gz
cgeo-710d3b5e41ad920519902f828f5a6ccc0a1c3c34.tar.bz2
new: strip unneeded markup from OC descriptions
-rw-r--r--main/src/cgeo/geocaching/connector/oc/OC11XMLParser.java28
-rw-r--r--tests/src/cgeo/geocaching/connector/oc/OCXMLTest.java22
2 files changed, 40 insertions, 10 deletions
diff --git a/main/src/cgeo/geocaching/connector/oc/OC11XMLParser.java b/main/src/cgeo/geocaching/connector/oc/OC11XMLParser.java
index 9a76a96..621032f 100644
--- a/main/src/cgeo/geocaching/connector/oc/OC11XMLParser.java
+++ b/main/src/cgeo/geocaching/connector/oc/OC11XMLParser.java
@@ -43,8 +43,7 @@ import java.util.regex.Pattern;
public class OC11XMLParser {
- private static final String PARAGRAPH_END = "</p>";
- private static final String PARAGRAPH_BEGIN = "<p>";
+ private static final String[] MARKUP = new String[] { "p", "span" };
private static Pattern STRIP_DATE = Pattern.compile("\\+0([0-9]){1}\\:00");
private static Pattern LOCAL_URL = Pattern.compile("href=\"(.*)\"");
private static final int CACHE_PARSE_LIMIT = 250;
@@ -514,7 +513,7 @@ public class OC11XMLParser {
@Override
public void end(String body) {
final String content = body.trim();
- descHolder.shortDesc = linkify(content);
+ descHolder.shortDesc = linkify(stripMarkup(content));
}
});
@@ -524,7 +523,7 @@ public class OC11XMLParser {
@Override
public void end(String body) {
final String content = body.trim();
- descHolder.desc = linkify(content);
+ descHolder.desc = linkify(stripMarkup(content));
}
});
@@ -733,12 +732,23 @@ public class OC11XMLParser {
* rendering.
*/
protected static String stripMarkup(String input) {
- if (StringUtils.startsWith(input, PARAGRAPH_BEGIN) && StringUtils.endsWith(input, PARAGRAPH_END)) {
- String inner = input.substring(PARAGRAPH_BEGIN.length(), input.length() - PARAGRAPH_END.length());
- if (!inner.contains(PARAGRAPH_BEGIN)) {
- return inner.trim();
+ if (!StringUtils.startsWith(input, "<")) {
+ return input;
+ }
+ String result = input.trim();
+ for (String tagName : MARKUP) {
+ final String startTag = "<" + tagName + ">";
+ if (StringUtils.startsWith(result, startTag)) {
+ final String endTag = "</" + tagName + ">";
+ if (StringUtils.endsWith(result, endTag)) {
+ String inner = result.substring(startTag.length(), result.length() - endTag.length()).trim();
+ String nested = stripMarkup(inner);
+ if (!nested.contains(startTag)) {
+ result = nested;
+ }
+ }
}
}
- return input;
+ return result;
}
} \ No newline at end of file
diff --git a/tests/src/cgeo/geocaching/connector/oc/OCXMLTest.java b/tests/src/cgeo/geocaching/connector/oc/OCXMLTest.java
index 46c3fd1..b12823a 100644
--- a/tests/src/cgeo/geocaching/connector/oc/OCXMLTest.java
+++ b/tests/src/cgeo/geocaching/connector/oc/OCXMLTest.java
@@ -82,7 +82,7 @@ public class OCXMLTest extends CGeoTestCase {
public static void testFetchTwiceDuplicatesDescription() {
final String geoCode = "OCEFBA";
- final String description = "<p><span>Bei dem Cache kannst du einen kleinen Schatz bergen. Bitte lege aber einen ander Schatz in das Döschen. Achtung vor Automuggels.</span></p>";
+ final String description = "Bei dem Cache kannst du einen kleinen Schatz bergen. Bitte lege aber einen ander Schatz in das Döschen. Achtung vor Automuggels.";
deleteCacheFromDB(geoCode);
Geocache cache = OCXMLClient.getCache(geoCode);
@@ -100,4 +100,24 @@ public class OCXMLTest extends CGeoTestCase {
deleteCacheFromDB(geoCode);
}
}
+
+ public static void testRemoveMarkupCache() {
+ final String geoCode = "OCEFBA";
+ final String description = "Bei dem Cache kannst du einen kleinen Schatz bergen. Bitte lege aber einen ander Schatz in das Döschen. Achtung vor Automuggels.";
+
+ Geocache cache = OCXMLClient.getCache(geoCode);
+ assertNotNull(cache);
+ assertEquals(description, cache.getDescription());
+ }
+
+ public static void testRemoveMarkup() {
+ assertEquals("", OC11XMLParser.stripMarkup(""));
+ assertEquals("Test", OC11XMLParser.stripMarkup("Test"));
+ assertEquals("<b>bold and others not removed</b>", OC11XMLParser.stripMarkup("<b>bold and others not removed</b>"));
+ assertEquals("unnecessary paragraph", OC11XMLParser.stripMarkup("<p>unnecessary paragraph</p>"));
+ assertEquals("unnecessary span", OC11XMLParser.stripMarkup("<span>unnecessary span</span>"));
+ assertEquals("nested", OC11XMLParser.stripMarkup("<span><span>nested</span></span>"));
+ assertEquals("mixed", OC11XMLParser.stripMarkup("<span> <p> mixed </p> </span>"));
+ assertEquals("<p>not</p><p>removable</p>", OC11XMLParser.stripMarkup("<p>not</p><p>removable</p>"));
+ }
}