From 5fd70dd8303ac44488fd621e6f2a9b5005ce7158 Mon Sep 17 00:00:00 2001 From: blafoo Date: Mon, 19 Sep 2011 22:03:21 +0200 Subject: Tuned regex for description and hint --- main/src/cgeo/geocaching/Constants.java | 53 +++++++++++ main/src/cgeo/geocaching/cgBase.java | 69 ++++---------- main/src/cgeo/geocaching/utils/BaseUtils.java | 48 ++++++++++ .../cgeo/geocaching/test/RegExPerformanceTest.java | 104 +++++++++++++++++++++ .../src/cgeo/geocaching/test/mock/MockedCache.java | 5 +- 5 files changed, 224 insertions(+), 55 deletions(-) create mode 100644 main/src/cgeo/geocaching/Constants.java create mode 100644 main/src/cgeo/geocaching/utils/BaseUtils.java create mode 100644 tests/src/cgeo/geocaching/test/RegExPerformanceTest.java diff --git a/main/src/cgeo/geocaching/Constants.java b/main/src/cgeo/geocaching/Constants.java new file mode 100644 index 0000000..3c2f74a --- /dev/null +++ b/main/src/cgeo/geocaching/Constants.java @@ -0,0 +1,53 @@ +package cgeo.geocaching; + +import java.util.regex.Pattern; + +public class Constants { + + /** + * For further information about patters have a look at + * http://download.oracle.com/javase/1.4.2/docs/api/java/util/regex/Pattern.html + */ + + /** Search until the start of the next tag. The tag can follow immediately */ + public static final String NEXT_START_TAG = "[^<]*"; + /** Search until the end of the actual tag. The closing tag can follow immediately */ + public static final String NEXT_END_TAG = "[^>]*"; + + /** Search until the start of the next tag. The tag must not follow immediately */ + public static final String NEXT_START_TAG2 = "[^<]+"; + /** Search until the end of the actual tag. The closing tag must not follow immediately */ + public static final String NEXT_END_TAG2 = "[^>]+"; + + /** P tag */ + public static final String TAG_P_START = "

"; + /** Closing P tag **/ + public static final String TAG_P_END = "

"; + /** Search until the next <p> */ + public static final String TAG_P_START_NEXT = NEXT_START_TAG + TAG_P_START; + /** Search until the next </p> */ + public static final String TAG_P_END_NEXT = NEXT_START_TAG + TAG_P_END; + + /** strong tag */ + public static final String TAG_STRONG_START = ""; + /** Closing strong tag */ + public static final String TAG_STRONG_END = ""; + /** Search until the next <strong> */ + public static final String TAG_STRONG_START_NEXT = NEXT_START_TAG + TAG_STRONG_START; + /** Search until the next </strong> */ + public static final String TAG_STRONG_END_NEXT = NEXT_START_TAG + TAG_STRONG_END; + + /** div tag */ + public static final String TAG_DIV_START = "
"; + /** closing div tag */ + public static final String TAG_DIV_END = "
"; + /** Search until the next <div> */ + public static final String TAG_DIV_START_NEXT = NEXT_START_TAG + TAG_DIV_START; + /** Search until the next </div> */ + public static final String TAG_DIV_END_NEXT = NEXT_START_TAG + TAG_DIV_END; + + public final static Pattern PATTERN_HINT = Pattern.compile("Additional Hints" + Constants.TAG_STRONG_END + "[^\\(]*\\(Encrypt\\)" + Constants.TAG_P_END + + Constants.NEXT_START_TAG + "
(.*?)" + Constants.TAG_DIV_END + Constants.NEXT_START_TAG + "
(.*?)" + Constants.TAG_DIV_END_NEXT + Constants.TAG_P_START_NEXT + Constants.TAG_P_END_NEXT + Constants.TAG_P_START_NEXT + Constants.TAG_STRONG_START_NEXT + "\\W*Additional Hints" + Constants.TAG_STRONG_END); + +} diff --git a/main/src/cgeo/geocaching/cgBase.java b/main/src/cgeo/geocaching/cgBase.java index 328a4d4..f8e0c24 100644 --- a/main/src/cgeo/geocaching/cgBase.java +++ b/main/src/cgeo/geocaching/cgBase.java @@ -8,6 +8,7 @@ import cgeo.geocaching.enumerations.WaypointType; import cgeo.geocaching.files.LocParser; import cgeo.geocaching.geopoint.DistanceParser; import cgeo.geocaching.geopoint.Geopoint; +import cgeo.geocaching.utils.BaseUtils; import cgeo.geocaching.utils.CollectionUtils; import org.apache.commons.lang3.ArrayUtils; @@ -100,10 +101,8 @@ public class cgBase { private final static Pattern patternFoundAlternative = Pattern.compile("
]*>()?([^<]*)(<\\/b>)?<\\/span>", Pattern.CASE_INSENSITIVE); private final static Pattern patternLocation = Pattern.compile("]*>In ([^<]*)", Pattern.CASE_INSENSITIVE); - private final static Pattern patternHint = Pattern.compile("
]*>(.*?)
", Pattern.CASE_INSENSITIVE); private final static Pattern patternPersonalNote = Pattern.compile("

]*>([^<]*)

", Pattern.CASE_INSENSITIVE); private final static Pattern patternDescShort = Pattern.compile("
[^<]*]*>((?:(?![^\\w^<]*
).)*)
[^\\w^<]*
", Pattern.CASE_INSENSITIVE); - private final static Pattern patternDesc = Pattern.compile("]*>" + "(.*)[^<]*
[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints", Pattern.CASE_INSENSITIVE); private final static Pattern patternCountLogs = Pattern.compile("<\\/span>", Pattern.CASE_INSENSITIVE); private final static Pattern patternCountLog = Pattern.compile("src=\"\\/images\\/icons\\/(.+?).gif\"[^>]+> (\\d*[,.]?\\d+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternAttributes = Pattern.compile("

[^<]*]+>\\W*Attributes[^<]*

[^<]*
(([^<]*\"[^\"]+\"[^]*>)+)[^<]* 0) { - cache.geocode = getMatch(matcherGeocode.group(1)); + cache.geocode = BaseUtils.getMatch(matcherGeocode.group(1)); } } catch (Exception e) { // failed to parse cache geocode @@ -1102,7 +1101,7 @@ public class cgBase { try { final Matcher matcherCacheId = patternCacheId.matcher(page); if (matcherCacheId.find() && matcherCacheId.groupCount() > 0) { - cache.cacheId = getMatch(matcherCacheId.group(1)); + cache.cacheId = BaseUtils.getMatch(matcherCacheId.group(1)); } } catch (Exception e) { // failed to parse cache id @@ -1113,7 +1112,7 @@ public class cgBase { try { final Matcher matcherCacheGuid = patternCacheGuid.matcher(page); if (matcherCacheGuid.find() && matcherCacheGuid.groupCount() > 0) { - cache.guid = getMatch(matcherCacheGuid.group(1)); + cache.guid = BaseUtils.getMatch(matcherCacheGuid.group(1)); } } catch (Exception e) { // failed to parse cache guid @@ -1239,7 +1238,7 @@ public class cgBase { try { final Matcher matcherSize = patternSize.matcher(tableInside); if (matcherSize.find() && matcherSize.groupCount() > 0) { - cache.size = CacheSize.FIND_BY_ID.get(getMatch(matcherSize.group(1)).toLowerCase()); + cache.size = CacheSize.FIND_BY_ID.get(BaseUtils.getMatch(matcherSize.group(1)).toLowerCase()); } } catch (Exception e) { // failed to parse size @@ -1274,7 +1273,7 @@ public class cgBase { try { final Matcher matcherLatLon = patternLatLon.matcher(page); if (matcherLatLon.find() && matcherLatLon.groupCount() > 0) { - cache.latlon = getMatch(matcherLatLon.group(2)); // first is + cache.latlon = BaseUtils.getMatch(matcherLatLon.group(2)); // first is Map tmp = cgBase.parseLatlon(cache.latlon); if (tmp.size() > 0) { @@ -1294,7 +1293,7 @@ public class cgBase { try { final Matcher matcherLocation = patternLocation.matcher(page); if (matcherLocation.find() && matcherLocation.groupCount() > 0) { - cache.location = getMatch(matcherLocation.group(1)); + cache.location = BaseUtils.getMatch(matcherLocation.group(1)); } } catch (Exception e) { // failed to parse location @@ -1303,7 +1302,7 @@ public class cgBase { // cache hint try { - final Matcher matcherHint = patternHint.matcher(page); + final Matcher matcherHint = Constants.PATTERN_HINT.matcher(page); if (matcherHint.find() && matcherHint.group(1) != null) { // replace linebreak and paragraph tags String hint = Pattern.compile("<(br|p)[^>]*>").matcher(matcherHint.group(1)).replaceAll("\n"); @@ -1346,7 +1345,7 @@ public class cgBase { try { final Matcher matcherPersonalNote = patternPersonalNote.matcher(page); if (matcherPersonalNote.find() && matcherPersonalNote.groupCount() > 0) { - cache.personalNote = getMatch(matcherPersonalNote.group(1)); + cache.personalNote = BaseUtils.getMatch(matcherPersonalNote.group(1)); } } catch (Exception e) { // failed to parse cache personal note @@ -1357,7 +1356,7 @@ public class cgBase { try { final Matcher matcherDescShort = patternDescShort.matcher(page); if (matcherDescShort.find() && matcherDescShort.groupCount() > 0) { - cache.shortdesc = getMatch(matcherDescShort.group(1)); + cache.shortdesc = BaseUtils.getMatch(matcherDescShort.group(1)); } } catch (Exception e) { // failed to parse short description @@ -1366,9 +1365,9 @@ public class cgBase { // cache description try { - final Matcher matcherDesc = patternDesc.matcher(page); + final Matcher matcherDesc = Constants.PATTERN_DESC.matcher(page); if (matcherDesc.find() && matcherDesc.groupCount() > 0) { - cache.description = getMatch(matcherDesc.group(1)); + cache.description = BaseUtils.getMatch(matcherDesc.group(1)); } } catch (Exception e) { // failed to parse short description @@ -1775,16 +1774,6 @@ public class cgBase { } } - private static String getMatch(String match) { - // creating a new String via String constructor is necessary here!! - return new String(match.trim()); - // Java copies the whole page String, when matching with regular expressions - // later this would block the garbage collector, as we only need tiny parts of the page - // see http://developer.android.com/reference/java/lang/String.html#backing_array - - // And BTW: You cannot even see that effect in the debugger, but must use a separate memory profiler! - } - public Date parseGcCustomDate(String input) throws ParseException { @@ -3745,7 +3734,7 @@ public class cgBase { "GET", new HashMap(), requestId, false, false, false); } else { if (StringUtils.isNotEmpty(buffer)) { - replaceWhitespace(buffer); + BaseUtils.replaceWhitespace(buffer); String data = buffer.toString(); buffer = null; @@ -3766,32 +3755,6 @@ public class cgBase { return response; } - /** - * Replace the characters \n, \r and \t with a space - * - * @param buffer - * The data - */ - public static void replaceWhitespace(final StringBuffer buffer) { - final int length = buffer.length(); - final char[] chars = new char[length]; - buffer.getChars(0, length, chars, 0); - int resultSize = 0; - boolean lastWasWhitespace = false; - for (char c : chars) { - if (c == ' ' || c == '\n' || c == '\r' || c == '\t') { - if (!lastWasWhitespace) { - chars[resultSize++] = ' '; - } - lastWasWhitespace = true; - } else { - chars[resultSize++] = c; - lastWasWhitespace = false; - } - } - buffer.setLength(0); - buffer.append(chars); - } public String requestJSONgc(final URI uri, String params) { int httpCode = -1; @@ -3879,7 +3842,7 @@ public class cgBase { final URI newLocation = uri.resolve(httpLocation); page = requestJSONgc(newLocation, params); } else { - replaceWhitespace(buffer); + BaseUtils.replaceWhitespace(buffer); page = buffer.toString(); } @@ -4029,7 +3992,7 @@ public class cgBase { * } * } else { */ - replaceWhitespace(buffer); + BaseUtils.replaceWhitespace(buffer); page = buffer.toString(); //} @@ -4604,7 +4567,7 @@ public class cgBase { /** * Generate a numeric date and time string according to system-wide settings (locale, * date format) such as "7 sept. à 12:35". - * + * * @param context * a Context * @param date diff --git a/main/src/cgeo/geocaching/utils/BaseUtils.java b/main/src/cgeo/geocaching/utils/BaseUtils.java new file mode 100644 index 0000000..2025765 --- /dev/null +++ b/main/src/cgeo/geocaching/utils/BaseUtils.java @@ -0,0 +1,48 @@ +/** + * + */ +package cgeo.geocaching.utils; + +/** + * Misc. utils + */ +public final class BaseUtils { + + /** + * Replace the characters \n, \r and \t with a space + * + * @param buffer + * The data + */ + public static void replaceWhitespace(final StringBuffer buffer) { + final int length = buffer.length(); + final char[] chars = new char[length]; + buffer.getChars(0, length, chars, 0); + int resultSize = 0; + boolean lastWasWhitespace = false; + for (char c : chars) { + if (c == ' ' || c == '\n' || c == '\r' || c == '\t') { + if (!lastWasWhitespace) { + chars[resultSize++] = ' '; + } + lastWasWhitespace = true; + } else { + chars[resultSize++] = c; + lastWasWhitespace = false; + } + } + buffer.setLength(0); + buffer.append(chars); + } + + public static String getMatch(String match) { + // creating a new String via String constructor is necessary here!! + return new String(match.trim()); + // Java copies the whole page String, when matching with regular expressions + // later this would block the garbage collector, as we only need tiny parts of the page + // see http://developer.android.com/reference/java/lang/String.html#backing_array + + // And BTW: You cannot even see that effect in the debugger, but must use a separate memory profiler! + } + +} diff --git a/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java b/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java new file mode 100644 index 0000000..277ec32 --- /dev/null +++ b/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java @@ -0,0 +1,104 @@ +package cgeo.geocaching.test; + +import cgeo.geocaching.Constants; +import cgeo.geocaching.test.mock.GC1ZXX2; +import cgeo.geocaching.test.mock.GC2CJPF; +import cgeo.geocaching.test.mock.MockedCache; +import cgeo.geocaching.utils.BaseUtils; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +/** + * Test class to compare the performance of two regular expressions on given data. + * Can be used to improve the time needed to parse the cache data + * Run As "JUnit Test" + * + * @author blafoo + */ +public class RegExPerformanceTest extends TestCase { + + // Regular expression: "" + // Input string 1: "" + // Input string 2: "" will handle a large, non-matching string almost a hundred times faster then the previous one! + + private final static Pattern PATTERN_ACTUAL = Pattern.compile("
]*>(.*?)
", Pattern.CASE_INSENSITIVE); + + private static final Pattern PATTERN_IMPROVED = Pattern.compile( + "Additional Hints" + Constants.TAG_STRONG_END + + "[^\\(]*\\(Encrypt\\)" + Constants.TAG_P_END + + Constants.NEXT_START_TAG + "
(.*?)" + Constants.TAG_DIV_END + Constants.NEXT_START_TAG + "
= group && matcherHint.group(group) != null) { + // replace linebreak and paragraph tags + String hint = Pattern.compile("<(br|p)" + Constants.NEXT_END_TAG + ">").matcher(matcherHint.group(group)).replaceAll("\n"); + if (hint != null) { + result = hint.replaceAll(Pattern.quote(Constants.TAG_P_END), "").trim(); + } + } + return result; + } + + private String parseDescription(String data, Pattern p, int group) { + String result = null; + final Matcher matcher = p.matcher(data); + if (matcher.find() && matcher.groupCount() >= group) { + result = BaseUtils.getMatch(matcher.group(group)); + } + return result; + } + + + public void testRegEx() { + + List cachesForParsing = new ArrayList(); + cachesForParsing.add(new GC2CJPF()); + cachesForParsing.add(new GC1ZXX2()); + + int ITERATIONS = 250; // 250 for an fast evaluation, 10000 else + + for (MockedCache cache : cachesForParsing) { + String page = cache.getData(); + String resultOld = parseHint(page, PATTERN_ACTUAL, 1); + String resultNew = parseHint(page, PATTERN_IMPROVED, 1); + assertEquals(resultOld, resultNew); + + long diffOld, diffNew; + + System.out.println("Parsing " + cache.getGeocode() + " " + cache.getName()); + { + System.out.println(("Result actual pattern:\t<<" + resultOld + ">>")); + + long start = System.currentTimeMillis(); + for (int j = 0; j < ITERATIONS; j++) { + parseHint(page, PATTERN_ACTUAL, 1); + } + diffOld = (System.currentTimeMillis() - start); + System.out.println("Time actual pattern:\t" + diffOld + " ms"); + } + + { + System.out.println(("Result new pattern:\t<<" + resultNew + ">>")); + long start = System.currentTimeMillis(); + for (int j = 0; j < ITERATIONS; j++) { + parseHint(page, PATTERN_IMPROVED, 1); + } + diffNew = (System.currentTimeMillis() - start); + System.out.println("Time new pattern:\t" + diffNew + " ms"); + } + Float reduction = new Float((float) diffNew * 100 / (float) diffOld); + System.out.println("Reduction to x percent:\t" + reduction.toString() + "\n"); + } + + } +} diff --git a/tests/src/cgeo/geocaching/test/mock/MockedCache.java b/tests/src/cgeo/geocaching/test/mock/MockedCache.java index 7494028..8c04a18 100644 --- a/tests/src/cgeo/geocaching/test/mock/MockedCache.java +++ b/tests/src/cgeo/geocaching/test/mock/MockedCache.java @@ -1,7 +1,7 @@ package cgeo.geocaching.test.mock; import cgeo.geocaching.ICache; -import cgeo.geocaching.cgBase; +import cgeo.geocaching.utils.BaseUtils; import java.io.BufferedReader; import java.io.IOException; @@ -27,9 +27,10 @@ public abstract class MockedCache implements ICache { buffer.append(line).append("\n"); } + br.close(); - cgBase.replaceWhitespace(buffer); + BaseUtils.replaceWhitespace(buffer); return buffer.toString(); } catch (IOException e) { -- cgit v1.1 From 9c64ecd51b41e1f1626837c1eec43c1f6404b5eb Mon Sep 17 00:00:00 2001 From: blafoo Date: Wed, 21 Sep 2011 22:51:45 +0200 Subject: "Simplified" regex --- main/src/cgeo/geocaching/Constants.java | 44 +++------------------------------ 1 file changed, 3 insertions(+), 41 deletions(-) diff --git a/main/src/cgeo/geocaching/Constants.java b/main/src/cgeo/geocaching/Constants.java index 3c2f74a..3b63ab3 100644 --- a/main/src/cgeo/geocaching/Constants.java +++ b/main/src/cgeo/geocaching/Constants.java @@ -5,49 +5,11 @@ import java.util.regex.Pattern; public class Constants { /** - * For further information about patters have a look at + * For further information about patterns have a look at * http://download.oracle.com/javase/1.4.2/docs/api/java/util/regex/Pattern.html */ - /** Search until the start of the next tag. The tag can follow immediately */ - public static final String NEXT_START_TAG = "[^<]*"; - /** Search until the end of the actual tag. The closing tag can follow immediately */ - public static final String NEXT_END_TAG = "[^>]*"; - - /** Search until the start of the next tag. The tag must not follow immediately */ - public static final String NEXT_START_TAG2 = "[^<]+"; - /** Search until the end of the actual tag. The closing tag must not follow immediately */ - public static final String NEXT_END_TAG2 = "[^>]+"; - - /** P tag */ - public static final String TAG_P_START = "

"; - /** Closing P tag **/ - public static final String TAG_P_END = "

"; - /** Search until the next <p> */ - public static final String TAG_P_START_NEXT = NEXT_START_TAG + TAG_P_START; - /** Search until the next </p> */ - public static final String TAG_P_END_NEXT = NEXT_START_TAG + TAG_P_END; - - /** strong tag */ - public static final String TAG_STRONG_START = ""; - /** Closing strong tag */ - public static final String TAG_STRONG_END = ""; - /** Search until the next <strong> */ - public static final String TAG_STRONG_START_NEXT = NEXT_START_TAG + TAG_STRONG_START; - /** Search until the next </strong> */ - public static final String TAG_STRONG_END_NEXT = NEXT_START_TAG + TAG_STRONG_END; - - /** div tag */ - public static final String TAG_DIV_START = "
"; - /** closing div tag */ - public static final String TAG_DIV_END = "
"; - /** Search until the next <div> */ - public static final String TAG_DIV_START_NEXT = NEXT_START_TAG + TAG_DIV_START; - /** Search until the next </div> */ - public static final String TAG_DIV_END_NEXT = NEXT_START_TAG + TAG_DIV_END; - - public final static Pattern PATTERN_HINT = Pattern.compile("Additional Hints" + Constants.TAG_STRONG_END + "[^\\(]*\\(Encrypt\\)" + Constants.TAG_P_END + - Constants.NEXT_START_TAG + "
(.*?)" + Constants.TAG_DIV_END + Constants.NEXT_START_TAG + "
(.*?)" + Constants.TAG_DIV_END_NEXT + Constants.TAG_P_START_NEXT + Constants.TAG_P_END_NEXT + Constants.TAG_P_START_NEXT + Constants.TAG_STRONG_START_NEXT + "\\W*Additional Hints" + Constants.TAG_STRONG_END); + public final static Pattern PATTERN_HINT = Pattern.compile("
]*>(.*?)
"); + public final static Pattern PATTERN_DESC = Pattern.compile("(.*?)[^<]*
[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints"); } -- cgit v1.1 From 1b9ebf3d001c8da6550b3ac4677921c16fdb135a Mon Sep 17 00:00:00 2001 From: blafoo Date: Sat, 24 Sep 2011 22:50:11 +0200 Subject: Pimped additional regex --- main/src/cgeo/geocaching/Constants.java | 8 + main/src/cgeo/geocaching/ICache.java | 20 +++ main/src/cgeo/geocaching/cgBase.java | 114 +++----------- main/src/cgeo/geocaching/cgCache.java | 20 +++ main/src/cgeo/geocaching/utils/BaseUtils.java | 28 +++- .../cgeo/geocaching/test/RegExPerformanceTest.java | 175 +++++++++++++++------ .../geocaching/test/RegExRealPerformanceTest.java | 27 ++++ .../cgeo/geocaching/test/cgeoApplicationTest.java | 4 + tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java | 20 +++ tests/src/cgeo/geocaching/test/mock/GC2CJPF.java | 20 +++ 10 files changed, 288 insertions(+), 148 deletions(-) create mode 100644 tests/src/cgeo/geocaching/test/RegExRealPerformanceTest.java diff --git a/main/src/cgeo/geocaching/Constants.java b/main/src/cgeo/geocaching/Constants.java index 3b63ab3..d51fe6d 100644 --- a/main/src/cgeo/geocaching/Constants.java +++ b/main/src/cgeo/geocaching/Constants.java @@ -11,5 +11,13 @@ public class Constants { public final static Pattern PATTERN_HINT = Pattern.compile("

]*>(.*?)
"); public final static Pattern PATTERN_DESC = Pattern.compile("(.*?)[^<]*
[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints"); + public final static Pattern PATTERN_SHORTDESC = Pattern.compile("(.*?)[^\\w^<]*

"); + public final static Pattern PATTERN_GEOCODE = Pattern.compile("[^<]*]*>[^S]*Size[^:]*:[^<]*]*>[^<]*\"Size:]*>[^<]*[^<]*[^<]*[^<]*

"); + public final static Pattern PATTERN_LATLON = Pattern.compile("]*>(.*?)"); + public final static Pattern PATTERN_LOCATION = Pattern.compile("In (.*?)"); + public final static Pattern PATTERN_PERSONALNOTE = Pattern.compile("

]*>(.*?)

"); } diff --git a/main/src/cgeo/geocaching/ICache.java b/main/src/cgeo/geocaching/ICache.java index 24dcd7c..89108a2 100644 --- a/main/src/cgeo/geocaching/ICache.java +++ b/main/src/cgeo/geocaching/ICache.java @@ -98,4 +98,24 @@ public interface ICache { */ public String getName(); + /** + * @return Id + */ + public String getCacheId(); + + /** + * @return Guid + */ + public String getGuid(); + + /** + * @return Location + */ + public String getLocation(); + + /** + * @return Personal note + */ + public String getPersonalNote(); + } diff --git a/main/src/cgeo/geocaching/cgBase.java b/main/src/cgeo/geocaching/cgBase.java index f8e0c24..0e85e7d 100644 --- a/main/src/cgeo/geocaching/cgBase.java +++ b/main/src/cgeo/geocaching/cgBase.java @@ -82,13 +82,9 @@ import javax.net.ssl.X509TrustManager; public class cgBase { - private final static Pattern patternGeocode = Pattern.compile("]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternCacheId = Pattern.compile("/seek/log\\.aspx\\?ID=(\\d+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternCacheGuid = Pattern.compile(Pattern.quote("&wid=") + "([0-9a-z\\-]+)" + Pattern.quote("&"), Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternType = Pattern.compile("\"([^\"]+)\"]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternName = Pattern.compile("]*>[^<]*([^<]+)<\\/span>[^<]*<\\/h2>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternSize = Pattern.compile("
[^<]*]*>[^S]*Size[^:]*:[^<]*]*>[^<]*\"Size:]*>[^<]*[^<]*[^<]*[^<]*

", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternDifficulty = Pattern.compile("]*>[^<]*\"[^\"]+\"[^]*>[^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternTerrain = Pattern.compile("]*>[^<]*\"[^\"]+\"[^]*>[^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternOwner = Pattern.compile("\\W*An?(\\W*Event)?\\W*cache\\W*by[^<]*([^<]+)[^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); @@ -99,10 +95,7 @@ public class cgBase { private final static Pattern patternFound = Pattern.compile("

[^<]*]*>[^<]*[^<]*

", Pattern.CASE_INSENSITIVE); private final static Pattern patternFoundAlternative = Pattern.compile("
]*>()?([^<]*)(<\\/b>)?<\\/span>", Pattern.CASE_INSENSITIVE); - private final static Pattern patternLocation = Pattern.compile("]*>In ([^<]*)", Pattern.CASE_INSENSITIVE); - private final static Pattern patternPersonalNote = Pattern.compile("

]*>([^<]*)

", Pattern.CASE_INSENSITIVE); - private final static Pattern patternDescShort = Pattern.compile("
[^<]*]*>((?:(?![^\\w^<]*
).)*)
[^\\w^<]*
", Pattern.CASE_INSENSITIVE); + private final static Pattern patternCountLogs = Pattern.compile("<\\/span>", Pattern.CASE_INSENSITIVE); private final static Pattern patternCountLog = Pattern.compile("src=\"\\/images\\/icons\\/(.+?).gif\"[^>]+> (\\d*[,.]?\\d+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternAttributes = Pattern.compile("

[^<]*]+>\\W*Attributes[^<]*

[^<]*
(([^<]*\"[^\"]+\"[^]*>)+)[^<]* 0) { - cache.geocode = BaseUtils.getMatch(matcherGeocode.group(1)); - } - } catch (Exception e) { - // failed to parse cache geocode - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache geocode"); - } + cache.geocode = BaseUtils.getMatch(page, Constants.PATTERN_GEOCODE, 1, cache.geocode); // cache id - try { - final Matcher matcherCacheId = patternCacheId.matcher(page); - if (matcherCacheId.find() && matcherCacheId.groupCount() > 0) { - cache.cacheId = BaseUtils.getMatch(matcherCacheId.group(1)); - } - } catch (Exception e) { - // failed to parse cache id - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache id"); - } + cache.cacheId = BaseUtils.getMatch(page, Constants.PATTERN_CACHEID, 1, cache.cacheId); // cache guid - try { - final Matcher matcherCacheGuid = patternCacheGuid.matcher(page); - if (matcherCacheGuid.find() && matcherCacheGuid.groupCount() > 0) { - cache.guid = BaseUtils.getMatch(matcherCacheGuid.group(1)); - } - } catch (Exception e) { - // failed to parse cache guid - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache guid"); - } + cache.guid = BaseUtils.getMatch(page, Constants.PATTERN_GUID, 1, cache.guid); // name try { @@ -1235,15 +1204,7 @@ public class cgBase { } // cache size - try { - final Matcher matcherSize = patternSize.matcher(tableInside); - if (matcherSize.find() && matcherSize.groupCount() > 0) { - cache.size = CacheSize.FIND_BY_ID.get(BaseUtils.getMatch(matcherSize.group(1)).toLowerCase()); - } - } catch (Exception e) { - // failed to parse size - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache size"); - } + cache.size = CacheSize.FIND_BY_ID.get(BaseUtils.getMatch(tableInside, Constants.PATTERN_SIZE, 1, CacheSize.NOT_CHOSEN.id).toLowerCase()); } // cache found @@ -1270,35 +1231,20 @@ public class cgBase { } // latitude and logitude - try { - final Matcher matcherLatLon = patternLatLon.matcher(page); - if (matcherLatLon.find() && matcherLatLon.groupCount() > 0) { - cache.latlon = BaseUtils.getMatch(matcherLatLon.group(2)); // first is - - Map tmp = cgBase.parseLatlon(cache.latlon); - if (tmp.size() > 0) { - cache.coords = new Geopoint((Double) tmp.get("latitude"), (Double) tmp.get("longitude")); - cache.latitudeString = (String) tmp.get("latitudeString"); - cache.longitudeString = (String) tmp.get("longitudeString"); - cache.reliableLatLon = true; - } - tmp = null; + cache.latlon = BaseUtils.getMatch(page, Constants.PATTERN_LATLON, 1, cache.latlon); + if (StringUtils.isNotEmpty(cache.latlon)) { + Map tmp = cgBase.parseLatlon(cache.latlon); + if (tmp.size() > 0) { + cache.coords = new Geopoint((Double) tmp.get("latitude"), (Double) tmp.get("longitude")); + cache.latitudeString = (String) tmp.get("latitudeString"); + cache.longitudeString = (String) tmp.get("longitudeString"); + cache.reliableLatLon = true; } - } catch (Exception e) { - // failed to parse latitude and/or longitude - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache coordinates"); + tmp = null; } // cache location - try { - final Matcher matcherLocation = patternLocation.matcher(page); - if (matcherLocation.find() && matcherLocation.groupCount() > 0) { - cache.location = BaseUtils.getMatch(matcherLocation.group(1)); - } - } catch (Exception e) { - // failed to parse location - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache location"); - } + cache.location = BaseUtils.getMatch(page, Constants.PATTERN_LOCATION, 1, cache.location); // cache hint try { @@ -1342,37 +1288,13 @@ public class cgBase { */ // cache personal note - try { - final Matcher matcherPersonalNote = patternPersonalNote.matcher(page); - if (matcherPersonalNote.find() && matcherPersonalNote.groupCount() > 0) { - cache.personalNote = BaseUtils.getMatch(matcherPersonalNote.group(1)); - } - } catch (Exception e) { - // failed to parse cache personal note - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache personal note"); - } + cache.personalNote = BaseUtils.getMatch(page, Constants.PATTERN_PERSONALNOTE, 1, cache.personalNote); // cache short description - try { - final Matcher matcherDescShort = patternDescShort.matcher(page); - if (matcherDescShort.find() && matcherDescShort.groupCount() > 0) { - cache.shortdesc = BaseUtils.getMatch(matcherDescShort.group(1)); - } - } catch (Exception e) { - // failed to parse short description - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache short description"); - } + cache.shortdesc = BaseUtils.getMatch(page, Constants.PATTERN_SHORTDESC, 1, cache.shortdesc); // cache description - try { - final Matcher matcherDesc = Constants.PATTERN_DESC.matcher(page); - if (matcherDesc.find() && matcherDesc.groupCount() > 0) { - cache.description = BaseUtils.getMatch(matcherDesc.group(1)); - } - } catch (Exception e) { - // failed to parse short description - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache description"); - } + cache.description = BaseUtils.getMatch(page, Constants.PATTERN_DESC, 1, cache.description); // cache attributes try { diff --git a/main/src/cgeo/geocaching/cgCache.java b/main/src/cgeo/geocaching/cgCache.java index f44585b..13d7b73 100644 --- a/main/src/cgeo/geocaching/cgCache.java +++ b/main/src/cgeo/geocaching/cgCache.java @@ -458,6 +458,26 @@ public class cgCache implements ICache { return name; } + @Override + public String getCacheId() { + return cacheId; + } + + @Override + public String getGuid() { + return guid; + } + + @Override + public String getLocation() { + return location; + } + + @Override + public String getPersonalNote() { + return personalNote; + } + public boolean supportsUserActions() { return getConnector().supportsUserActions(); } diff --git a/main/src/cgeo/geocaching/utils/BaseUtils.java b/main/src/cgeo/geocaching/utils/BaseUtils.java index 2025765..1ae8337 100644 --- a/main/src/cgeo/geocaching/utils/BaseUtils.java +++ b/main/src/cgeo/geocaching/utils/BaseUtils.java @@ -3,14 +3,36 @@ */ package cgeo.geocaching.utils; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + /** * Misc. utils */ public final class BaseUtils { /** + * Searches for the pattern p in the data for the n-th group. If the pattern + * is not found defaultValue is returned + * + * @param data + * @param p + * @param group + * @param defaultValue + * @return + */ + public static String getMatch(final String data, final Pattern p, final int group, final String defaultValue) { + String result = defaultValue; + final Matcher matcher = p.matcher(data); + if (matcher.find() && matcher.groupCount() >= group) { + result = BaseUtils.makeCopy(matcher.group(group)); + } + return result; + } + + /** * Replace the characters \n, \r and \t with a space - * + * * @param buffer * The data */ @@ -35,13 +57,13 @@ public final class BaseUtils { buffer.append(chars); } - public static String getMatch(String match) { + public static String makeCopy(final String match) { // creating a new String via String constructor is necessary here!! return new String(match.trim()); // Java copies the whole page String, when matching with regular expressions // later this would block the garbage collector, as we only need tiny parts of the page // see http://developer.android.com/reference/java/lang/String.html#backing_array - + // And BTW: You cannot even see that effect in the debugger, but must use a separate memory profiler! } diff --git a/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java b/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java index 277ec32..59c1d2a 100644 --- a/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java +++ b/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java @@ -1,6 +1,5 @@ package cgeo.geocaching.test; -import cgeo.geocaching.Constants; import cgeo.geocaching.test.mock.GC1ZXX2; import cgeo.geocaching.test.mock.GC2CJPF; import cgeo.geocaching.test.mock.MockedCache; @@ -8,7 +7,6 @@ import cgeo.geocaching.utils.BaseUtils; import java.util.ArrayList; import java.util.List; -import java.util.regex.Matcher; import java.util.regex.Pattern; import junit.framework.TestCase; @@ -28,77 +26,156 @@ public class RegExPerformanceTest extends TestCase { // "a(.*)a", it's much better to use "a([^a]*)a". // The rewritten expression "" will handle a large, non-matching string almost a hundred times faster then the previous one! - private final static Pattern PATTERN_ACTUAL = Pattern.compile("
]*>(.*?)
", Pattern.CASE_INSENSITIVE); + /** Search until the start of the next tag. The tag can follow immediately */ + public static final String NEXT_START_TAG = "[^<]*"; + /** Search until the end of the actual tag. The closing tag can follow immediately */ + public static final String NEXT_END_TAG = "[^>]*"; - private static final Pattern PATTERN_IMPROVED = Pattern.compile( - "Additional Hints" + Constants.TAG_STRONG_END + - "[^\\(]*\\(Encrypt\\)" + Constants.TAG_P_END + - Constants.NEXT_START_TAG + "
(.*?)" + Constants.TAG_DIV_END + Constants.NEXT_START_TAG + "
= group && matcherHint.group(group) != null) { - // replace linebreak and paragraph tags - String hint = Pattern.compile("<(br|p)" + Constants.NEXT_END_TAG + ">").matcher(matcherHint.group(group)).replaceAll("\n"); - if (hint != null) { - result = hint.replaceAll(Pattern.quote(Constants.TAG_P_END), "").trim(); - } + /** strong tag */ + public static final String TAG_STRONG_START = ""; + /** Closing strong tag */ + public static final String TAG_STRONG_END = ""; + /** Search until the next <strong> */ + public static final String TAG_STRONG_START_NEXT = NEXT_START_TAG + TAG_STRONG_START; + /** Search until the next </strong> */ + public static final String TAG_STRONG_END_NEXT = NEXT_START_TAG + TAG_STRONG_END; + + /** div tag */ + public static final String TAG_DIV_START = "
"; + /** closing div tag */ + public static final String TAG_DIV_END = "
"; + /** Search until the next <div> */ + public static final String TAG_DIV_START_NEXT = NEXT_START_TAG + TAG_DIV_START; + /** Search until the next </div> */ + public static final String TAG_DIV_END_NEXT = NEXT_START_TAG + TAG_DIV_END; + + public final static Pattern PATTERN_DESCRIPTION_OLD = Pattern.compile("]*>" + "(.*)[^<]*
[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints", Pattern.CASE_INSENSITIVE); + public final static Pattern PATTERN_DESCRIPTION_NEW = Pattern.compile("(.*?)[^<]*

[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints"); + + public final static Pattern PATTERN_HINT_OLD = Pattern.compile("

]*>(.*?)
", Pattern.CASE_INSENSITIVE); + public final static Pattern PATTERN_HINT_NEW = Pattern.compile("
]*>(.*?)
"); + + public final static Pattern PATTERN_SHORTDESC_OLD = Pattern.compile("
[^<]*]*>((?:(?![^\\w^<]*
).)*)[^\\w^<]*
", Pattern.CASE_INSENSITIVE); + public final static Pattern PATTERN_SHORTDESC_NEW = Pattern.compile("(.*?)[^\\w^<]*
"); + + private final static Pattern PATTERN_GEOCODE_OLD = Pattern.compile("]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_GEOCODE_NEW = Pattern.compile("[^<]*]*>[^S]*Size[^:]*:[^<]*]*>[^<]*\"Size:]*>[^<]*[^<]*[^<]*
[^<]*

", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_SIZE_NEW = Pattern.compile("
[^<]*]*>[^S]*Size[^:]*:[^<]*]*>[^<]*\"Size:]*>[^<]*[^<]*[^<]*[^<]*

"); + + private final static Pattern PATTERN_LATLON_OLD = Pattern.compile("]*>([^<]*)<\\/span>", Pattern.CASE_INSENSITIVE); + private final static Pattern PATTERN_LATLON_NEW = Pattern.compile("]*>(.*?)"); + + private final static Pattern PATTERN_LOCATION_OLD = Pattern.compile("]*>In ([^<]*)", Pattern.CASE_INSENSITIVE); + private final static Pattern PATTERN_LOCATION_NEW = Pattern.compile("In (.*?)"); + + private final static Pattern PATTERN_PERSONALNOTE_OLD = Pattern.compile("

]*>([^<]*)

", Pattern.CASE_INSENSITIVE); + private final static Pattern PATTERN_PERSONALNOTE_NEW = Pattern.compile("

]*>(.*?)

"); + + + public void testRegEx() { + List output = doTheTests(); + + for (String s : output) { + System.out.println(s); } - return result; } - private String parseDescription(String data, Pattern p, int group) { - String result = null; - final Matcher matcher = p.matcher(data); - if (matcher.find() && matcher.groupCount() >= group) { - result = BaseUtils.getMatch(matcher.group(group)); - } - return result; + public static List doTheTests() { + + int iterations = 1; // set the value to 1000 for real performance measurements + + List output = new ArrayList(); + + output.add("Testing pattern for hint..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_HINT_OLD, PATTERN_HINT_NEW)); + output.add("Testing pattern for description..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_DESCRIPTION_OLD, PATTERN_DESCRIPTION_NEW)); + output.add("Testing pattern for short description..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_SHORTDESC_OLD, PATTERN_SHORTDESC_NEW)); + output.add("Testing pattern for geocode..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_GEOCODE_OLD, PATTERN_GEOCODE_NEW)); + output.add("Testing pattern for cache id..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_CACHEID_OLD, PATTERN_CACHEID_NEW)); + output.add("Testing pattern for cache guid..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_GUID_OLD, PATTERN_GUID_NEW)); + output.add("Testing pattern for size..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_SIZE_OLD, PATTERN_SIZE_NEW)); + output.add("Testing pattern for latlon..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_LATLON_OLD, PATTERN_LATLON_NEW)); + output.add("Testing pattern for location..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_LOCATION_OLD, PATTERN_LOCATION_NEW)); + output.add("Testing pattern for personal note..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_PERSONALNOTE_OLD, PATTERN_PERSONALNOTE_NEW)); + + return output; } + private static List measure(int iterations, Pattern p1, Pattern p2) { - public void testRegEx() { + List output = new ArrayList(); List cachesForParsing = new ArrayList(); cachesForParsing.add(new GC2CJPF()); cachesForParsing.add(new GC1ZXX2()); - int ITERATIONS = 250; // 250 for an fast evaluation, 10000 else - for (MockedCache cache : cachesForParsing) { String page = cache.getData(); - String resultOld = parseHint(page, PATTERN_ACTUAL, 1); - String resultNew = parseHint(page, PATTERN_IMPROVED, 1); - assertEquals(resultOld, resultNew); + String result1 = BaseUtils.getMatch(page, p1, 1, ""); + String result2 = BaseUtils.getMatch(page, p2, 1, ""); + assertEquals(result1, result2); - long diffOld, diffNew; + long diff1, diff2; - System.out.println("Parsing " + cache.getGeocode() + " " + cache.getName()); + output.add("Parsing " + cache.getGeocode() + " " + cache.getName()); { - System.out.println(("Result actual pattern:\t<<" + resultOld + ">>")); - - long start = System.currentTimeMillis(); - for (int j = 0; j < ITERATIONS; j++) { - parseHint(page, PATTERN_ACTUAL, 1); - } - diffOld = (System.currentTimeMillis() - start); - System.out.println("Time actual pattern:\t" + diffOld + " ms"); + output.add("Result pattern #1:\t<<" + result1 + ">>"); + diff1 = parse(page, p1, iterations); + output.add("Time pattern #1:\t" + diff1 + " ms"); } { - System.out.println(("Result new pattern:\t<<" + resultNew + ">>")); - long start = System.currentTimeMillis(); - for (int j = 0; j < ITERATIONS; j++) { - parseHint(page, PATTERN_IMPROVED, 1); - } - diffNew = (System.currentTimeMillis() - start); - System.out.println("Time new pattern:\t" + diffNew + " ms"); + output.add("Result pattern #2:\t<<" + result2 + ">>"); + diff2 = parse(page, p2, iterations); + output.add("Time pattern #2:\t" + diff2 + " ms"); } - Float reduction = new Float((float) diffNew * 100 / (float) diffOld); - System.out.println("Reduction to x percent:\t" + reduction.toString() + "\n"); + Float reduction = new Float((float) diff2 * 100 / (float) diff1); + output.add("Reduction to x percent:\t" + reduction.toString() + "\n"); } + return output; + + } + + private static long parse(String page, Pattern pattern, int iterations) + { + long start = System.currentTimeMillis(); + for (int j = 0; j < iterations; j++) { + BaseUtils.getMatch(page, pattern, 1, ""); + } + return (System.currentTimeMillis() - start); + } + } diff --git a/tests/src/cgeo/geocaching/test/RegExRealPerformanceTest.java b/tests/src/cgeo/geocaching/test/RegExRealPerformanceTest.java new file mode 100644 index 0000000..90a7d37 --- /dev/null +++ b/tests/src/cgeo/geocaching/test/RegExRealPerformanceTest.java @@ -0,0 +1,27 @@ +package cgeo.geocaching.test; + +import cgeo.geocaching.cgSettings; + +import android.test.AndroidTestCase; +import android.util.Log; + +import java.util.List; + +/** + * Test class to compare the performance of two regular expressions on given data. + * Can be used to improve the time needed to parse the cache data + * + * @author blafoo + */ +public class RegExRealPerformanceTest extends AndroidTestCase { + + public void testRegEx() { + + List output = RegExPerformanceTest.doTheTests(); + + for (String s : output) { + Log.w(cgSettings.tag, s); + } + + } +} diff --git a/tests/src/cgeo/geocaching/test/cgeoApplicationTest.java b/tests/src/cgeo/geocaching/test/cgeoApplicationTest.java index d7140ab..0a3cdae 100644 --- a/tests/src/cgeo/geocaching/test/cgeoApplicationTest.java +++ b/tests/src/cgeo/geocaching/test/cgeoApplicationTest.java @@ -106,6 +106,10 @@ public class cgeoApplicationTest extends ApplicationTestCase { Assert.assertTrue(cacheParsed.getDescription().startsWith(cache.getDescription())); Assert.assertEquals(cache.getShortDescription(), cacheParsed.getShortDescription()); Assert.assertEquals(cache.getName(), cacheParsed.getName()); + Assert.assertEquals(cache.getCacheId(), cacheParsed.getCacheId()); + Assert.assertEquals(cache.getGuid(), cacheParsed.getGuid()); + Assert.assertEquals(cache.getLocation(), cacheParsed.getLocation()); + Assert.assertEquals(cache.getPersonalNote(), cacheParsed.getPersonalNote()); } } diff --git a/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java b/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java index 9cd653a..e150f29 100644 --- a/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java +++ b/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java @@ -90,4 +90,24 @@ public class GC1ZXX2 extends MockedCache { return "Hannopoly: Eislisenstrasse "; } + @Override + public String getCacheId() { + return "1433909"; + } + + @Override + public String getGuid() { + return "36d45871-b99d-46d6-95fc-ff86ab564c98"; + } + + @Override + public String getLocation() { + return "Niedersachsen, Germany"; + } + + @Override + public String getPersonalNote() { + return ""; + } + } diff --git a/tests/src/cgeo/geocaching/test/mock/GC2CJPF.java b/tests/src/cgeo/geocaching/test/mock/GC2CJPF.java index 8029f93..3cd602a 100644 --- a/tests/src/cgeo/geocaching/test/mock/GC2CJPF.java +++ b/tests/src/cgeo/geocaching/test/mock/GC2CJPF.java @@ -90,4 +90,24 @@ public class GC2CJPF extends MockedCache { return "Kinderwald KiC"; } + @Override + public String getCacheId() { + return "1811409"; + } + + @Override + public String getGuid() { + return "73246a5a-ebb9-4d4f-8db9-a951036f5376"; + } + + @Override + public String getLocation() { + return "Niedersachsen, Germany"; + } + + @Override + public String getPersonalNote() { + return ""; + } + } -- cgit v1.1 From 426ed1d6bd4336a8ca5dc3e3c05b78abfa2c6504 Mon Sep 17 00:00:00 2001 From: blafoo Date: Sun, 25 Sep 2011 14:41:43 +0200 Subject: Pimped additional regex --- main/src/cgeo/geocaching/Constants.java | 8 ++- main/src/cgeo/geocaching/cgBase.java | 55 ++++--------------- main/src/cgeo/geocaching/utils/BaseUtils.java | 23 ++++---- .../cgeo/geocaching/test/RegExPerformanceTest.java | 62 ++++++++++++++-------- tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java | 2 +- 5 files changed, 69 insertions(+), 81 deletions(-) diff --git a/main/src/cgeo/geocaching/Constants.java b/main/src/cgeo/geocaching/Constants.java index d51fe6d..e3cc293 100644 --- a/main/src/cgeo/geocaching/Constants.java +++ b/main/src/cgeo/geocaching/Constants.java @@ -2,7 +2,7 @@ package cgeo.geocaching; import java.util.regex.Pattern; -public class Constants { +public final class Constants { /** * For further information about patterns have a look at @@ -19,5 +19,11 @@ public class Constants { public final static Pattern PATTERN_LATLON = Pattern.compile("]*>(.*?)"); public final static Pattern PATTERN_LOCATION = Pattern.compile("In (.*?)"); public final static Pattern PATTERN_PERSONALNOTE = Pattern.compile("

]*>(.*?)

"); + public final static Pattern PATTERN_NAME = Pattern.compile("(.*?)"); + public final static Pattern PATTERN_DIFFICULTY = Pattern.compile("]*>[^<]*\"");]*>[^<]*\"");]*>[^<]*[^<]*

"); + public final static Pattern PATTERN_FOUND_ALTERNATIVE = Pattern.compile("
]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - - private final static Pattern patternName = Pattern.compile("]*>[^<]*([^<]+)<\\/span>[^<]*<\\/h2>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternDifficulty = Pattern.compile("]*>[^<]*\"[^\"]+\"[^]*>[^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternTerrain = Pattern.compile("]*>[^<]*\"[^\"]+\"[^]*>[^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternOwner = Pattern.compile("\\W*An?(\\W*Event)?\\W*cache\\W*by[^<]*([^<]+)[^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternOwnerReal = Pattern.compile("[^<]+", Pattern.CASE_INSENSITIVE); private final static Pattern patternHidden = Pattern.compile("]*>\\W*Hidden[\\s:]*([^<]+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternHiddenEvent = Pattern.compile("]*>\\W*Event\\W*Date[^:]*:([^<]*)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternFavourite = Pattern.compile("]*>[^<]*[^\\d]*([0-9]+)[^\\d^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - - private final static Pattern patternFound = Pattern.compile("

[^<]*]*>[^<]*[^<]*

", Pattern.CASE_INSENSITIVE); - private final static Pattern patternFoundAlternative = Pattern.compile("
<\\/span>", Pattern.CASE_INSENSITIVE); private final static Pattern patternCountLog = Pattern.compile("src=\"\\/images\\/icons\\/(.+?).gif\"[^>]+> (\\d*[,.]?\\d+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternAttributes = Pattern.compile("

[^<]*]+>\\W*Attributes[^<]*

[^<]*
(([^<]*\"[^\"]+\"[^]*>)+)[^<]* 0) { - cache.name = Html.fromHtml(matcherName.group(1)).toString(); - } - } catch (Exception e) { - // failed to parse cache name - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache name"); - } + cache.name = Html.fromHtml(BaseUtils.getMatch(page, Constants.PATTERN_NAME, 1, cache.name)).toString(); // owner real name - try { - final Matcher matcherOwnerReal = patternOwnerReal.matcher(page); - if (matcherOwnerReal.find() && matcherOwnerReal.groupCount() > 0) { - cache.ownerReal = URLDecoder.decode(matcherOwnerReal.group(1)); - } - } catch (Exception e) { - // failed to parse owner real name - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache owner real name"); - } + // TODO URLDecoder.decode ?? + cache.ownerReal = BaseUtils.getMatch(page, Constants.PATTERN_OWNERREAL, 1, cache.ownerReal); final String username = settings.getUsername(); if (cache.ownerReal != null && username != null && cache.ownerReal.equalsIgnoreCase(username)) { @@ -1136,25 +1111,15 @@ public class cgBase { if (StringUtils.isNotBlank(tableInside)) { // cache terrain - try { - final Matcher matcherTerrain = patternTerrain.matcher(tableInside); - if (matcherTerrain.find() && matcherTerrain.groupCount() > 0) { - cache.terrain = new Float(Pattern.compile("_").matcher(matcherTerrain.group(1)).replaceAll(".")); - } - } catch (Exception e) { - // failed to parse terrain - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache terrain"); + String result = BaseUtils.getMatch(tableInside, Constants.PATTERN_TERRAIN, 1, null); + if (result != null) { + cache.terrain = new Float(Pattern.compile("_").matcher(result).replaceAll(".")); } // cache difficulty - try { - final Matcher matcherDifficulty = patternDifficulty.matcher(tableInside); - if (matcherDifficulty.find() && matcherDifficulty.groupCount() > 0) { - cache.difficulty = new Float(Pattern.compile("_").matcher(matcherDifficulty.group(1)).replaceAll(".")); - } - } catch (Exception e) { - // failed to parse difficulty - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache difficulty"); + result = BaseUtils.getMatch(tableInside, Constants.PATTERN_DIFFICULTY, 1, null); + if (result != null) { + cache.difficulty = new Float(Pattern.compile("_").matcher(result).replaceAll(".")); } // owner @@ -1208,7 +1173,7 @@ public class cgBase { } // cache found - cache.found = patternFound.matcher(page).find() || patternFoundAlternative.matcher(page).find(); + cache.found = Constants.PATTERN_FOUND.matcher(page).find() || Constants.PATTERN_FOUND_ALTERNATIVE.matcher(page).find(); // cache type try { diff --git a/main/src/cgeo/geocaching/utils/BaseUtils.java b/main/src/cgeo/geocaching/utils/BaseUtils.java index 1ae8337..5f4833b 100644 --- a/main/src/cgeo/geocaching/utils/BaseUtils.java +++ b/main/src/cgeo/geocaching/utils/BaseUtils.java @@ -22,16 +22,23 @@ public final class BaseUtils { * @return */ public static String getMatch(final String data, final Pattern p, final int group, final String defaultValue) { - String result = defaultValue; final Matcher matcher = p.matcher(data); if (matcher.find() && matcher.groupCount() >= group) { - result = BaseUtils.makeCopy(matcher.group(group)); + // creating a new String via String constructor is necessary here!! + return new String(matcher.group(group).trim()); + // Java copies the whole page String, when matching with regular expressions + // later this would block the garbage collector, as we only need tiny parts of the page + // see http://developer.android.com/reference/java/lang/String.html#backing_array + + // And BTW: You cannot even see that effect in the debugger, but must use a separate memory profiler! } - return result; + return defaultValue; } /** * Replace the characters \n, \r and \t with a space + * The result is a very long single "line". + * Don't change this behavior - the patterns for parsing rely on this matter of fact ! * * @param buffer * The data @@ -57,14 +64,4 @@ public final class BaseUtils { buffer.append(chars); } - public static String makeCopy(final String match) { - // creating a new String via String constructor is necessary here!! - return new String(match.trim()); - // Java copies the whole page String, when matching with regular expressions - // later this would block the garbage collector, as we only need tiny parts of the page - // see http://developer.android.com/reference/java/lang/String.html#backing_array - - // And BTW: You cannot even see that effect in the debugger, but must use a separate memory profiler! - } - } diff --git a/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java b/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java index 59c1d2a..e77d021 100644 --- a/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java +++ b/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java @@ -64,34 +64,46 @@ public class RegExPerformanceTest extends TestCase { public static final String TAG_DIV_END_NEXT = NEXT_START_TAG + TAG_DIV_END; public final static Pattern PATTERN_DESCRIPTION_OLD = Pattern.compile("]*>" + "(.*)[^<]*
[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints", Pattern.CASE_INSENSITIVE); - public final static Pattern PATTERN_DESCRIPTION_NEW = Pattern.compile("(.*?)[^<]*

[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints"); + public final static Pattern PATTERN_DESCRIPTION = Pattern.compile("(.*?)[^<]*

[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints"); public final static Pattern PATTERN_HINT_OLD = Pattern.compile("

]*>(.*?)
", Pattern.CASE_INSENSITIVE); - public final static Pattern PATTERN_HINT_NEW = Pattern.compile("
]*>(.*?)
"); + public final static Pattern PATTERN_HINT = Pattern.compile("
]*>(.*?)
"); public final static Pattern PATTERN_SHORTDESC_OLD = Pattern.compile("
[^<]*]*>((?:(?![^\\w^<]*
).)*)
[^\\w^<]*
", Pattern.CASE_INSENSITIVE); - public final static Pattern PATTERN_SHORTDESC_NEW = Pattern.compile("(.*?)[^\\w^<]*
"); + public final static Pattern PATTERN_SHORTDESC = Pattern.compile("(.*?)[^\\w^<]*
"); private final static Pattern PATTERN_GEOCODE_OLD = Pattern.compile("]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern PATTERN_GEOCODE_NEW = Pattern.compile("[^<]*]*>[^S]*Size[^:]*:[^<]*]*>[^<]*\"Size:]*>[^<]*[^<]*[^<]*[^<]*

", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern PATTERN_SIZE_NEW = Pattern.compile("
[^<]*]*>[^S]*Size[^:]*:[^<]*]*>[^<]*\"Size:]*>[^<]*[^<]*[^<]*[^<]*

"); + private final static Pattern PATTERN_SIZE = Pattern.compile("
[^<]*]*>[^S]*Size[^:]*:[^<]*]*>[^<]*\"Size:]*>[^<]*[^<]*[^<]*[^<]*

"); private final static Pattern PATTERN_LATLON_OLD = Pattern.compile("]*>([^<]*)<\\/span>", Pattern.CASE_INSENSITIVE); - private final static Pattern PATTERN_LATLON_NEW = Pattern.compile("]*>(.*?)"); + private final static Pattern PATTERN_LATLON = Pattern.compile("]*>(.*?)"); private final static Pattern PATTERN_LOCATION_OLD = Pattern.compile("]*>In ([^<]*)", Pattern.CASE_INSENSITIVE); - private final static Pattern PATTERN_LOCATION_NEW = Pattern.compile("In (.*?)"); + private final static Pattern PATTERN_LOCATION = Pattern.compile("In (.*?)"); private final static Pattern PATTERN_PERSONALNOTE_OLD = Pattern.compile("

]*>([^<]*)

", Pattern.CASE_INSENSITIVE); - private final static Pattern PATTERN_PERSONALNOTE_NEW = Pattern.compile("

]*>(.*?)

"); + private final static Pattern PATTERN_PERSONALNOTE = Pattern.compile("

]*>(.*?)

"); + + private final static Pattern PATTERN_NAME_OLD = Pattern.compile("]*>[^<]*([^<]+)<\\/span>[^<]*<\\/h2>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_NAME = Pattern.compile("(.*?)"); + + private final static Pattern PATTERN_DIFFICULTY_OLD = Pattern.compile("]*>[^<]*\"[^\"]+\"[^]*>[^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_DIFFICULTY = Pattern.compile("]*>[^<]*\"");]*>[^<]*\"[^\"]+\"[^]*>[^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_TERRAIN = Pattern.compile("]*>[^<]*\"");[^<]+", Pattern.CASE_INSENSITIVE); + private final static Pattern PATTERN_OWNERREAL = Pattern.compile(" doTheTests() { - int iterations = 1; // set the value to 1000 for real performance measurements + int iterations = 250; // set the value to 1000 for real performance measurements List output = new ArrayList(); output.add("Testing pattern for hint..."); - output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_HINT_OLD, PATTERN_HINT_NEW)); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_HINT_OLD, PATTERN_HINT)); output.add("Testing pattern for description..."); - output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_DESCRIPTION_OLD, PATTERN_DESCRIPTION_NEW)); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_DESCRIPTION_OLD, PATTERN_DESCRIPTION)); output.add("Testing pattern for short description..."); - output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_SHORTDESC_OLD, PATTERN_SHORTDESC_NEW)); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_SHORTDESC_OLD, PATTERN_SHORTDESC)); output.add("Testing pattern for geocode..."); - output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_GEOCODE_OLD, PATTERN_GEOCODE_NEW)); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_GEOCODE_OLD, PATTERN_GEOCODE)); output.add("Testing pattern for cache id..."); - output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_CACHEID_OLD, PATTERN_CACHEID_NEW)); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_CACHEID_OLD, PATTERN_CACHEID)); output.add("Testing pattern for cache guid..."); - output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_GUID_OLD, PATTERN_GUID_NEW)); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_GUID_OLD, PATTERN_GUID)); output.add("Testing pattern for size..."); - output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_SIZE_OLD, PATTERN_SIZE_NEW)); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_SIZE_OLD, PATTERN_SIZE)); output.add("Testing pattern for latlon..."); - output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_LATLON_OLD, PATTERN_LATLON_NEW)); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_LATLON_OLD, PATTERN_LATLON)); output.add("Testing pattern for location..."); - output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_LOCATION_OLD, PATTERN_LOCATION_NEW)); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_LOCATION_OLD, PATTERN_LOCATION)); output.add("Testing pattern for personal note..."); - output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_PERSONALNOTE_OLD, PATTERN_PERSONALNOTE_NEW)); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_PERSONALNOTE_OLD, PATTERN_PERSONALNOTE)); + output.add("Testing pattern for name..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_NAME_OLD, PATTERN_NAME)); + output.add("Testing pattern for difficulty..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_DIFFICULTY_OLD, PATTERN_DIFFICULTY)); + output.add("Testing pattern for terrain..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_TERRAIN_OLD, PATTERN_TERRAIN)); + output.add("Testing pattern for owner real..."); + output.addAll(RegExPerformanceTest.measure(iterations, PATTERN_OWNERREAL_OLD, PATTERN_OWNERREAL)); return output; } diff --git a/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java b/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java index e150f29..76a6e92 100644 --- a/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java +++ b/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java @@ -87,7 +87,7 @@ public class GC1ZXX2 extends MockedCache { @Override public String getName() { - return "Hannopoly: Eislisenstrasse "; + return "Hannopoly: Eislisenstrasse"; } @Override -- cgit v1.1 From 9393d3815bc9211091b3eef5f1c79c4c976fdc7b Mon Sep 17 00:00:00 2001 From: blafoo Date: Sun, 25 Sep 2011 14:44:06 +0200 Subject: Pimped additional regex --- main/src/cgeo/geocaching/cgBase.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/main/src/cgeo/geocaching/cgBase.java b/main/src/cgeo/geocaching/cgBase.java index 11caf4a..4dd494f 100644 --- a/main/src/cgeo/geocaching/cgBase.java +++ b/main/src/cgeo/geocaching/cgBase.java @@ -49,6 +49,7 @@ import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; +import java.net.URLDecoder; import java.net.URLEncoder; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; @@ -1082,8 +1083,8 @@ public class cgBase { cache.name = Html.fromHtml(BaseUtils.getMatch(page, Constants.PATTERN_NAME, 1, cache.name)).toString(); // owner real name - // TODO URLDecoder.decode ?? - cache.ownerReal = BaseUtils.getMatch(page, Constants.PATTERN_OWNERREAL, 1, cache.ownerReal); + // URLDecoder.decode() neccessary here ? + cache.ownerReal = URLDecoder.decode(BaseUtils.getMatch(page, Constants.PATTERN_OWNERREAL, 1, cache.ownerReal)); final String username = settings.getUsername(); if (cache.ownerReal != null && username != null && cache.ownerReal.equalsIgnoreCase(username)) { @@ -4454,7 +4455,7 @@ public class cgBase { /** * Generate a numeric date and time string according to system-wide settings (locale, * date format) such as "7 sept. à 12:35". - * + * * @param context * a Context * @param date -- cgit v1.1