diff options
| author | Bananeweizen <bananeweizen@gmx.de> | 2013-03-02 13:05:12 +0100 |
|---|---|---|
| committer | Bananeweizen <bananeweizen@gmx.de> | 2013-03-02 13:05:12 +0100 |
| commit | 710d3b5e41ad920519902f828f5a6ccc0a1c3c34 (patch) | |
| tree | f1206251a6390a6808950311560d04d0c94c0fe7 | |
| parent | 40765f11f49f15dd9a0a040b631228b45c7211a5 (diff) | |
| download | cgeo-710d3b5e41ad920519902f828f5a6ccc0a1c3c34.zip cgeo-710d3b5e41ad920519902f828f5a6ccc0a1c3c34.tar.gz cgeo-710d3b5e41ad920519902f828f5a6ccc0a1c3c34.tar.bz2 | |
new: strip unneeded markup from OC descriptions
| -rw-r--r-- | main/src/cgeo/geocaching/connector/oc/OC11XMLParser.java | 28 | ||||
| -rw-r--r-- | tests/src/cgeo/geocaching/connector/oc/OCXMLTest.java | 22 |
2 files changed, 40 insertions, 10 deletions
diff --git a/main/src/cgeo/geocaching/connector/oc/OC11XMLParser.java b/main/src/cgeo/geocaching/connector/oc/OC11XMLParser.java index 9a76a96..621032f 100644 --- a/main/src/cgeo/geocaching/connector/oc/OC11XMLParser.java +++ b/main/src/cgeo/geocaching/connector/oc/OC11XMLParser.java @@ -43,8 +43,7 @@ import java.util.regex.Pattern; public class OC11XMLParser { - private static final String PARAGRAPH_END = "</p>"; - private static final String PARAGRAPH_BEGIN = "<p>"; + private static final String[] MARKUP = new String[] { "p", "span" }; private static Pattern STRIP_DATE = Pattern.compile("\\+0([0-9]){1}\\:00"); private static Pattern LOCAL_URL = Pattern.compile("href=\"(.*)\""); private static final int CACHE_PARSE_LIMIT = 250; @@ -514,7 +513,7 @@ public class OC11XMLParser { @Override public void end(String body) { final String content = body.trim(); - descHolder.shortDesc = linkify(content); + descHolder.shortDesc = linkify(stripMarkup(content)); } }); @@ -524,7 +523,7 @@ public class OC11XMLParser { @Override public void end(String body) { final String content = body.trim(); - descHolder.desc = linkify(content); + descHolder.desc = linkify(stripMarkup(content)); } }); @@ -733,12 +732,23 @@ public class OC11XMLParser { * rendering. */ protected static String stripMarkup(String input) { - if (StringUtils.startsWith(input, PARAGRAPH_BEGIN) && StringUtils.endsWith(input, PARAGRAPH_END)) { - String inner = input.substring(PARAGRAPH_BEGIN.length(), input.length() - PARAGRAPH_END.length()); - if (!inner.contains(PARAGRAPH_BEGIN)) { - return inner.trim(); + if (!StringUtils.startsWith(input, "<")) { + return input; + } + String result = input.trim(); + for (String tagName : MARKUP) { + final String startTag = "<" + tagName + ">"; + if (StringUtils.startsWith(result, startTag)) { + final String endTag = "</" + tagName + ">"; + if (StringUtils.endsWith(result, endTag)) { + String inner = result.substring(startTag.length(), result.length() - endTag.length()).trim(); + String nested = stripMarkup(inner); + if (!nested.contains(startTag)) { + result = nested; + } + } } } - return input; + return result; } }
\ No newline at end of file diff --git a/tests/src/cgeo/geocaching/connector/oc/OCXMLTest.java b/tests/src/cgeo/geocaching/connector/oc/OCXMLTest.java index 46c3fd1..b12823a 100644 --- a/tests/src/cgeo/geocaching/connector/oc/OCXMLTest.java +++ b/tests/src/cgeo/geocaching/connector/oc/OCXMLTest.java @@ -82,7 +82,7 @@ public class OCXMLTest extends CGeoTestCase { public static void testFetchTwiceDuplicatesDescription() { final String geoCode = "OCEFBA"; - final String description = "<p><span>Bei dem Cache kannst du einen kleinen Schatz bergen. Bitte lege aber einen ander Schatz in das Döschen. Achtung vor Automuggels.</span></p>"; + final String description = "Bei dem Cache kannst du einen kleinen Schatz bergen. Bitte lege aber einen ander Schatz in das Döschen. Achtung vor Automuggels."; deleteCacheFromDB(geoCode); Geocache cache = OCXMLClient.getCache(geoCode); @@ -100,4 +100,24 @@ public class OCXMLTest extends CGeoTestCase { deleteCacheFromDB(geoCode); } } + + public static void testRemoveMarkupCache() { + final String geoCode = "OCEFBA"; + final String description = "Bei dem Cache kannst du einen kleinen Schatz bergen. Bitte lege aber einen ander Schatz in das Döschen. Achtung vor Automuggels."; + + Geocache cache = OCXMLClient.getCache(geoCode); + assertNotNull(cache); + assertEquals(description, cache.getDescription()); + } + + public static void testRemoveMarkup() { + assertEquals("", OC11XMLParser.stripMarkup("")); + assertEquals("Test", OC11XMLParser.stripMarkup("Test")); + assertEquals("<b>bold and others not removed</b>", OC11XMLParser.stripMarkup("<b>bold and others not removed</b>")); + assertEquals("unnecessary paragraph", OC11XMLParser.stripMarkup("<p>unnecessary paragraph</p>")); + assertEquals("unnecessary span", OC11XMLParser.stripMarkup("<span>unnecessary span</span>")); + assertEquals("nested", OC11XMLParser.stripMarkup("<span><span>nested</span></span>")); + assertEquals("mixed", OC11XMLParser.stripMarkup("<span> <p> mixed </p> </span>")); + assertEquals("<p>not</p><p>removable</p>", OC11XMLParser.stripMarkup("<p>not</p><p>removable</p>")); + } } |
