diff options
-rw-r--r--[-rwxr-xr-x] | main/project/localization/findmissingtranslations.sh | 0 | ||||
-rw-r--r--[-rwxr-xr-x] | main/res/values-sv/strings.xml | 0 | ||||
-rw-r--r-- | main/src/cgeo/geocaching/Constants.java | 29 | ||||
-rw-r--r-- | main/src/cgeo/geocaching/ICache.java | 20 | ||||
-rw-r--r-- | main/src/cgeo/geocaching/cgBase.java | 174 | ||||
-rw-r--r-- | main/src/cgeo/geocaching/cgCache.java | 20 | ||||
-rw-r--r-- | main/src/cgeo/geocaching/utils/BaseUtils.java | 67 | ||||
-rw-r--r-- | tests/src/cgeo/geocaching/test/RegExPerformanceTest.java | 183 | ||||
-rw-r--r-- | tests/src/cgeo/geocaching/test/RegExRealPerformanceTest.java | 27 | ||||
-rw-r--r-- | tests/src/cgeo/geocaching/test/cgeoApplicationTest.java | 4 | ||||
-rw-r--r-- | tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java | 22 | ||||
-rw-r--r-- | tests/src/cgeo/geocaching/test/mock/GC2CJPF.java | 20 | ||||
-rw-r--r-- | tests/src/cgeo/geocaching/test/mock/MockedCache.java | 1 |
13 files changed, 418 insertions, 149 deletions
diff --git a/main/project/localization/findmissingtranslations.sh b/main/project/localization/findmissingtranslations.sh index f91228e..f91228e 100755..100644 --- a/main/project/localization/findmissingtranslations.sh +++ b/main/project/localization/findmissingtranslations.sh diff --git a/main/res/values-sv/strings.xml b/main/res/values-sv/strings.xml index 5c5052f..5c5052f 100755..100644 --- a/main/res/values-sv/strings.xml +++ b/main/res/values-sv/strings.xml diff --git a/main/src/cgeo/geocaching/Constants.java b/main/src/cgeo/geocaching/Constants.java new file mode 100644 index 0000000..e3cc293 --- /dev/null +++ b/main/src/cgeo/geocaching/Constants.java @@ -0,0 +1,29 @@ +package cgeo.geocaching; + +import java.util.regex.Pattern; + +public final class Constants { + + /** + * For further information about patterns have a look at + * http://download.oracle.com/javase/1.4.2/docs/api/java/util/regex/Pattern.html + */ + + public final static Pattern PATTERN_HINT = Pattern.compile("<div id=\"div_hint\"[^>]*>(.*?)</div>"); + public final static Pattern PATTERN_DESC = Pattern.compile("<span id=\"ctl00_ContentBody_LongDescription\">(.*?)</span>[^<]*</div>[^<]*<p>[^<]*</p>[^<]*<p>[^<]*<strong>\\W*Additional Hints</strong>"); + public final static Pattern PATTERN_SHORTDESC = Pattern.compile("<span id=\"ctl00_ContentBody_ShortDescription\">(.*?)</span>[^\\w^<]*</div>"); + public final static Pattern PATTERN_GEOCODE = Pattern.compile("<meta name=\"og:url\" content=\"[^\"]+/(GC[0-9A-Z]+)\""); + public final static Pattern PATTERN_CACHEID = Pattern.compile("/seek/log\\.aspx\\?ID=(\\d+)"); + public final static Pattern PATTERN_GUID = Pattern.compile(Pattern.quote("&wid=") + "([0-9a-z\\-]+)" + Pattern.quote("&")); + public final static Pattern PATTERN_SIZE = Pattern.compile("<div class=\"CacheSize[^\"]*\">[^<]*<p[^>]*>[^S]*Size[^:]*:[^<]*<span[^>]*>[^<]*<img src=\"[^\"]*/icons/container/[a-z_]+\\.gif\" alt=\"Size: ([^\"]+)\"[^>]*>[^<]*<small>[^<]*</small>[^<]*</span>[^<]*</p>"); + public final static Pattern PATTERN_LATLON = Pattern.compile("<span id=\"ctl00_ContentBody_LatLon\"[^>]*>(.*?)</span>"); + public final static Pattern PATTERN_LOCATION = Pattern.compile("<span id=\"ctl00_ContentBody_Location\">In (.*?)</span>"); + public final static Pattern PATTERN_PERSONALNOTE = Pattern.compile("<p id=\"cache_note\"[^>]*>(.*?)</p>"); + public final static Pattern PATTERN_NAME = Pattern.compile("<span id=\"ctl00_ContentBody_CacheName\">(.*?)</span>"); + public final static Pattern PATTERN_DIFFICULTY = Pattern.compile("<span id=\"ctl00_ContentBody_uxLegendScale\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\""); + public final static Pattern PATTERN_TERRAIN = Pattern.compile("<span id=\"ctl00_ContentBody_Localize6\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\""); + public final static Pattern PATTERN_OWNERREAL = Pattern.compile("<a id=\"ctl00_ContentBody_uxFindLinksHiddenByThisUser\" href=\"[^\"]*/seek/nearest\\.aspx\\?u=(.*?)\""); + public final static Pattern PATTERN_FOUND = Pattern.compile("<a id=\"ctl00_ContentBody_hlFoundItLog\"[^<]*<img src=\".*/images/stockholm/16x16/check\\.gif\"[^>]*>[^<]*</a>[^<]*</p>"); + public final static Pattern PATTERN_FOUND_ALTERNATIVE = Pattern.compile("<div class=\"StatusInformationWidget FavoriteWidget\""); + +} diff --git a/main/src/cgeo/geocaching/ICache.java b/main/src/cgeo/geocaching/ICache.java index 24dcd7c..89108a2 100644 --- a/main/src/cgeo/geocaching/ICache.java +++ b/main/src/cgeo/geocaching/ICache.java @@ -98,4 +98,24 @@ public interface ICache { */ public String getName(); + /** + * @return Id + */ + public String getCacheId(); + + /** + * @return Guid + */ + public String getGuid(); + + /** + * @return Location + */ + public String getLocation(); + + /** + * @return Personal note + */ + public String getPersonalNote(); + } diff --git a/main/src/cgeo/geocaching/cgBase.java b/main/src/cgeo/geocaching/cgBase.java index 457a616..2e66edb 100644 --- a/main/src/cgeo/geocaching/cgBase.java +++ b/main/src/cgeo/geocaching/cgBase.java @@ -9,6 +9,7 @@ import cgeo.geocaching.enumerations.WaypointType; import cgeo.geocaching.files.LocParser; import cgeo.geocaching.geopoint.DistanceParser; import cgeo.geocaching.geopoint.Geopoint; +import cgeo.geocaching.utils.BaseUtils; import cgeo.geocaching.utils.CollectionUtils; import org.apache.commons.lang3.ArrayUtils; @@ -93,29 +94,11 @@ import javax.net.ssl.X509TrustManager; public class cgBase { - private final static Pattern patternGeocode = Pattern.compile("<meta name=\"og:url\" content=\"[^\"]+/(GC[0-9A-Z]+)\"[^>]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternCacheId = Pattern.compile("/seek/log\\.aspx\\?ID=(\\d+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternCacheGuid = Pattern.compile(Pattern.quote("&wid=") + "([0-9a-z\\-]+)" + Pattern.quote("&"), Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternType = Pattern.compile("<img src=\"[^\"]*/WptTypes/\\d+\\.gif\" alt=\"([^\"]+)\" (title=\"[^\"]*\" )?width=\"32\" height=\"32\"[^>]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - - private final static Pattern patternName = Pattern.compile("<h2[^>]*>[^<]*<span id=\"ctl00_ContentBody_CacheName\">([^<]+)<\\/span>[^<]*<\\/h2>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternSize = Pattern.compile("<div class=\"CacheSize[^\"]*\">[^<]*<p[^>]*>[^S]*Size[^:]*:[^<]*<span[^>]*>[^<]*<img src=\"[^\"]*/icons/container/[a-z_]+\\.gif\" alt=\"Size: ([^\"]+)\"[^>]*>[^<]*<small>[^<]*</small>[^<]*</span>[^<]*</p>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternDifficulty = Pattern.compile("<span id=\"ctl00_ContentBody_uxLegendScale\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\"[^\"]+\"[^>]*>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternTerrain = Pattern.compile("<span id=\"ctl00_ContentBody_Localize6\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\"[^\"]+\"[^>]*>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternOwner = Pattern.compile("<span class=\"minorCacheDetails\">\\W*An?(\\W*Event)?\\W*cache\\W*by[^<]*<a href=\"[^\"]+\">([^<]+)</a>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - private final static Pattern patternOwnerReal = Pattern.compile("<a id=\"ctl00_ContentBody_uxFindLinksHiddenByThisUser\" href=\"[^\"]*/seek/nearest\\.aspx\\?u=*([^\"]+)\">[^<]+</a>", Pattern.CASE_INSENSITIVE); private final static Pattern patternHidden = Pattern.compile("<span[^>]*>\\W*Hidden[\\s:]*([^<]+)</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternHiddenEvent = Pattern.compile("<span[^>]*>\\W*Event\\W*Date[^:]*:([^<]*)</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternFavourite = Pattern.compile("<a id=\"uxFavContainerLink\"[^>]*>[^<]*<div[^<]*<span class=\"favorite-value\">[^\\d]*([0-9]+)[^\\d^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); - - private final static Pattern patternFound = Pattern.compile("<p>[^<]*<a id=\"ctl00_ContentBody_hlFoundItLog\"[^<]*<img src=\".*/images/stockholm/16x16/check\\.gif\"[^>]*>[^<]*</a>[^<]*</p>", Pattern.CASE_INSENSITIVE); - private final static Pattern patternFoundAlternative = Pattern.compile("<div class=\"StatusInformationWidget FavoriteWidget\"", Pattern.CASE_INSENSITIVE); - private final static Pattern patternLatLon = Pattern.compile("<span id=\"ctl00_ContentBody_LatLon\"[^>]*>(<b>)?([^<]*)(<\\/b>)?<\\/span>", Pattern.CASE_INSENSITIVE); - private final static Pattern patternLocation = Pattern.compile("<span id=\"ctl00_ContentBody_Location\"[^>]*>In ([^<]*)", Pattern.CASE_INSENSITIVE); - private final static Pattern patternHint = Pattern.compile("<div id=\"div_hint\"[^>]*>(.*?)</div>", Pattern.CASE_INSENSITIVE); - private final static Pattern patternPersonalNote = Pattern.compile("<p id=\"cache_note\"[^>]*>([^<]*)</p>", Pattern.CASE_INSENSITIVE); - private final static Pattern patternDescShort = Pattern.compile("<div class=\"UserSuppliedContent\">[^<]*<span id=\"ctl00_ContentBody_ShortDescription\"[^>]*>((?:(?!</span>[^\\w^<]*</div>).)*)</span>[^\\w^<]*</div>", Pattern.CASE_INSENSITIVE); - private final static Pattern patternDesc = Pattern.compile("<span id=\"ctl00_ContentBody_LongDescription\"[^>]*>" + "(.*)</span>[^<]*</div>[^<]*<p>[^<]*</p>[^<]*<p>[^<]*<strong>\\W*Additional Hints</strong>", Pattern.CASE_INSENSITIVE); private final static Pattern patternCountLogs = Pattern.compile("<span id=\"ctl00_ContentBody_lblFindCounts\"><p(.+?)<\\/p><\\/span>", Pattern.CASE_INSENSITIVE); private final static Pattern patternCountLog = Pattern.compile("src=\"\\/images\\/icons\\/(.+?).gif\"[^>]+> (\\d*[,.]?\\d+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern patternAttributes = Pattern.compile("<h3 class=\"WidgetHeader\">[^<]*<img[^>]+>\\W*Attributes[^<]*</h3>[^<]*<div class=\"WidgetBody\">(([^<]*<img src=\"[^\"]+\" alt=\"[^\"]+\"[^>]*>)+)[^<]*<p", Pattern.CASE_INSENSITIVE); @@ -1092,59 +1075,20 @@ public class cgBase { cache.reason = reason; // cache geocode - try { - final Matcher matcherGeocode = patternGeocode.matcher(page); - if (matcherGeocode.find() && matcherGeocode.groupCount() > 0) { - cache.geocode = getMatch(matcherGeocode.group(1)); - } - } catch (Exception e) { - // failed to parse cache geocode - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache geocode"); - } + cache.geocode = BaseUtils.getMatch(page, Constants.PATTERN_GEOCODE, 1, cache.geocode); // cache id - try { - final Matcher matcherCacheId = patternCacheId.matcher(page); - if (matcherCacheId.find() && matcherCacheId.groupCount() > 0) { - cache.cacheId = getMatch(matcherCacheId.group(1)); - } - } catch (Exception e) { - // failed to parse cache id - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache id"); - } + cache.cacheId = BaseUtils.getMatch(page, Constants.PATTERN_CACHEID, 1, cache.cacheId); // cache guid - try { - final Matcher matcherCacheGuid = patternCacheGuid.matcher(page); - if (matcherCacheGuid.find() && matcherCacheGuid.groupCount() > 0) { - cache.guid = getMatch(matcherCacheGuid.group(1)); - } - } catch (Exception e) { - // failed to parse cache guid - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache guid"); - } + cache.guid = BaseUtils.getMatch(page, Constants.PATTERN_GUID, 1, cache.guid); // name - try { - final Matcher matcherName = patternName.matcher(page); - if (matcherName.find() && matcherName.groupCount() > 0) { - cache.name = Html.fromHtml(matcherName.group(1)).toString(); - } - } catch (Exception e) { - // failed to parse cache name - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache name"); - } + cache.name = Html.fromHtml(BaseUtils.getMatch(page, Constants.PATTERN_NAME, 1, cache.name)).toString(); // owner real name - try { - final Matcher matcherOwnerReal = patternOwnerReal.matcher(page); - if (matcherOwnerReal.find() && matcherOwnerReal.groupCount() > 0) { - cache.ownerReal = URLDecoder.decode(matcherOwnerReal.group(1)); - } - } catch (Exception e) { - // failed to parse owner real name - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache owner real name"); - } + // URLDecoder.decode() neccessary here ? + cache.ownerReal = URLDecoder.decode(BaseUtils.getMatch(page, Constants.PATTERN_OWNERREAL, 1, cache.ownerReal)); final String username = settings.getUsername(); if (cache.ownerReal != null && username != null && cache.ownerReal.equalsIgnoreCase(username)) { @@ -1172,25 +1116,15 @@ public class cgBase { if (StringUtils.isNotBlank(tableInside)) { // cache terrain - try { - final Matcher matcherTerrain = patternTerrain.matcher(tableInside); - if (matcherTerrain.find() && matcherTerrain.groupCount() > 0) { - cache.terrain = new Float(Pattern.compile("_").matcher(matcherTerrain.group(1)).replaceAll(".")); - } - } catch (Exception e) { - // failed to parse terrain - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache terrain"); + String result = BaseUtils.getMatch(tableInside, Constants.PATTERN_TERRAIN, 1, null); + if (result != null) { + cache.terrain = new Float(Pattern.compile("_").matcher(result).replaceAll(".")); } // cache difficulty - try { - final Matcher matcherDifficulty = patternDifficulty.matcher(tableInside); - if (matcherDifficulty.find() && matcherDifficulty.groupCount() > 0) { - cache.difficulty = new Float(Pattern.compile("_").matcher(matcherDifficulty.group(1)).replaceAll(".")); - } - } catch (Exception e) { - // failed to parse difficulty - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache difficulty"); + result = BaseUtils.getMatch(tableInside, Constants.PATTERN_DIFFICULTY, 1, null); + if (result != null) { + cache.difficulty = new Float(Pattern.compile("_").matcher(result).replaceAll(".")); } // owner @@ -1240,19 +1174,11 @@ public class cgBase { } // cache size - try { - final Matcher matcherSize = patternSize.matcher(tableInside); - if (matcherSize.find() && matcherSize.groupCount() > 0) { - cache.size = CacheSize.FIND_BY_ID.get(getMatch(matcherSize.group(1)).toLowerCase()); - } - } catch (Exception e) { - // failed to parse size - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache size"); - } + cache.size = CacheSize.FIND_BY_ID.get(BaseUtils.getMatch(tableInside, Constants.PATTERN_SIZE, 1, CacheSize.NOT_CHOSEN.id).toLowerCase()); } // cache found - cache.found = patternFound.matcher(page).find() || patternFoundAlternative.matcher(page).find(); + cache.found = Constants.PATTERN_FOUND.matcher(page).find() || Constants.PATTERN_FOUND_ALTERNATIVE.matcher(page).find(); // cache type try { @@ -1275,32 +1201,18 @@ public class cgBase { } // latitude and logitude - try { - final Matcher matcherLatLon = patternLatLon.matcher(page); - if (matcherLatLon.find() && matcherLatLon.groupCount() > 0) { - cache.latlon = getMatch(matcherLatLon.group(2)); // first is <b> - cache.coords = new Geopoint(cache.latlon); - cache.reliableLatLon = true; - } - } catch (Exception e) { - // failed to parse latitude and/or longitude - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache coordinates"); + cache.latlon = BaseUtils.getMatch(page, Constants.PATTERN_LATLON, 1, cache.latlon); + if (StringUtils.isNotEmpty(cache.latlon)) { + cache.coords = new Geopoint(cache.latlon); + cache.reliableLatLon = true; } // cache location - try { - final Matcher matcherLocation = patternLocation.matcher(page); - if (matcherLocation.find() && matcherLocation.groupCount() > 0) { - cache.location = getMatch(matcherLocation.group(1)); - } - } catch (Exception e) { - // failed to parse location - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache location"); - } + cache.location = BaseUtils.getMatch(page, Constants.PATTERN_LOCATION, 1, cache.location); // cache hint try { - final Matcher matcherHint = patternHint.matcher(page); + final Matcher matcherHint = Constants.PATTERN_HINT.matcher(page); if (matcherHint.find() && matcherHint.group(1) != null) { // replace linebreak and paragraph tags String hint = Pattern.compile("<(br|p)[^>]*>").matcher(matcherHint.group(1)).replaceAll("\n"); @@ -1340,37 +1252,13 @@ public class cgBase { */ // cache personal note - try { - final Matcher matcherPersonalNote = patternPersonalNote.matcher(page); - if (matcherPersonalNote.find() && matcherPersonalNote.groupCount() > 0) { - cache.personalNote = getMatch(matcherPersonalNote.group(1)); - } - } catch (Exception e) { - // failed to parse cache personal note - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache personal note"); - } + cache.personalNote = BaseUtils.getMatch(page, Constants.PATTERN_PERSONALNOTE, 1, cache.personalNote); // cache short description - try { - final Matcher matcherDescShort = patternDescShort.matcher(page); - if (matcherDescShort.find() && matcherDescShort.groupCount() > 0) { - cache.shortdesc = getMatch(matcherDescShort.group(1)); - } - } catch (Exception e) { - // failed to parse short description - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache short description"); - } + cache.shortdesc = BaseUtils.getMatch(page, Constants.PATTERN_SHORTDESC, 1, cache.shortdesc); // cache description - try { - final Matcher matcherDesc = patternDesc.matcher(page); - if (matcherDesc.find() && matcherDesc.groupCount() > 0) { - cache.description = getMatch(matcherDesc.group(1)); - } - } catch (Exception e) { - // failed to parse short description - Log.w(cgSettings.tag, "cgeoBase.parseCache: Failed to parse cache description"); - } + cache.description = BaseUtils.getMatch(page, Constants.PATTERN_DESC, 1, cache.description); // cache attributes try { @@ -1772,16 +1660,6 @@ public class cgBase { } } - private static String getMatch(String match) { - // creating a new String via String constructor is necessary here!! - return new String(match.trim()); - // Java copies the whole page String, when matching with regular expressions - // later this would block the garbage collector, as we only need tiny parts of the page - // see http://developer.android.com/reference/java/lang/String.html#backing_array - - // And BTW: You cannot even see that effect in the debugger, but must use a separate memory profiler! - } - public Date parseGcCustomDate(final String input) throws ParseException { @@ -3410,12 +3288,12 @@ public class cgBase { /** * Replace the characters \n, \r and \t with a space. The input are complete HTML pages. * This method must be fast, but may not lead to the shortest replacement String. - * + * * @param buffer * The data */ public static String replaceWhitespace(final String data) { - // YOU ARE ONLY ALLOWED TO CHANGE THIS CODE IF YOU CAN PROVE IT BECAME FASTER ON A DEVICE + // You are only allowed to change this code if you can prove it became faster on a device. // see WhitespaceTest in the test project final int length = data.length(); final char[] chars = new char[length]; diff --git a/main/src/cgeo/geocaching/cgCache.java b/main/src/cgeo/geocaching/cgCache.java index c821e28..af3def1 100644 --- a/main/src/cgeo/geocaching/cgCache.java +++ b/main/src/cgeo/geocaching/cgCache.java @@ -452,6 +452,26 @@ public class cgCache implements ICache { return name; } + @Override + public String getCacheId() { + return cacheId; + } + + @Override + public String getGuid() { + return guid; + } + + @Override + public String getLocation() { + return location; + } + + @Override + public String getPersonalNote() { + return personalNote; + } + public boolean supportsUserActions() { return getConnector().supportsUserActions(); } diff --git a/main/src/cgeo/geocaching/utils/BaseUtils.java b/main/src/cgeo/geocaching/utils/BaseUtils.java new file mode 100644 index 0000000..5f4833b --- /dev/null +++ b/main/src/cgeo/geocaching/utils/BaseUtils.java @@ -0,0 +1,67 @@ +/** + * + */ +package cgeo.geocaching.utils; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Misc. utils + */ +public final class BaseUtils { + + /** + * Searches for the pattern p in the data for the n-th group. If the pattern + * is not found defaultValue is returned + * + * @param data + * @param p + * @param group + * @param defaultValue + * @return + */ + public static String getMatch(final String data, final Pattern p, final int group, final String defaultValue) { + final Matcher matcher = p.matcher(data); + if (matcher.find() && matcher.groupCount() >= group) { + // creating a new String via String constructor is necessary here!! + return new String(matcher.group(group).trim()); + // Java copies the whole page String, when matching with regular expressions + // later this would block the garbage collector, as we only need tiny parts of the page + // see http://developer.android.com/reference/java/lang/String.html#backing_array + + // And BTW: You cannot even see that effect in the debugger, but must use a separate memory profiler! + } + return defaultValue; + } + + /** + * Replace the characters \n, \r and \t with a space + * The result is a very long single "line". + * Don't change this behavior - the patterns for parsing rely on this matter of fact ! + * + * @param buffer + * The data + */ + public static void replaceWhitespace(final StringBuffer buffer) { + final int length = buffer.length(); + final char[] chars = new char[length]; + buffer.getChars(0, length, chars, 0); + int resultSize = 0; + boolean lastWasWhitespace = false; + for (char c : chars) { + if (c == ' ' || c == '\n' || c == '\r' || c == '\t') { + if (!lastWasWhitespace) { + chars[resultSize++] = ' '; + } + lastWasWhitespace = true; + } else { + chars[resultSize++] = c; + lastWasWhitespace = false; + } + } + buffer.setLength(0); + buffer.append(chars); + } + +} diff --git a/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java b/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java new file mode 100644 index 0000000..2dbbc46 --- /dev/null +++ b/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java @@ -0,0 +1,183 @@ +package cgeo.geocaching.test; + +import cgeo.geocaching.test.mock.GC1ZXX2; +import cgeo.geocaching.test.mock.GC2CJPF; +import cgeo.geocaching.test.mock.MockedCache; +import cgeo.geocaching.utils.BaseUtils; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +/** + * Test class to compare the performance of two regular expressions on given data. + * Can be used to improve the time needed to parse the cache data + * Run As "JUnit Test" + * + * @author blafoo + */ +public class RegExPerformanceTest extends TestCase { + + // Regular expression: "<img.*src=(\S*)/>" + // Input string 1: "<img border=1 src=image.jpg />" + // Input string 2: "<img src=src=src=src= .... many src= ... src=src=" + // "a(.*)a", it's much better to use "a([^a]*)a". + // The rewritten expression "<img((?!src=).)*src=(\S*)/>" will handle a large, non-matching string almost a hundred times faster then the previous one! + + /** Search until the start of the next tag. The tag can follow immediately */ + public static final String NEXT_START_TAG = "[^<]*"; + /** Search until the end of the actual tag. The closing tag can follow immediately */ + public static final String NEXT_END_TAG = "[^>]*"; + + /** Search until the start of the next tag. The tag must not follow immediately */ + public static final String NEXT_START_TAG2 = "[^<]+"; + /** Search until the end of the actual tag. The closing tag must not follow immediately */ + public static final String NEXT_END_TAG2 = "[^>]+"; + + /** P tag */ + public static final String TAG_P_START = "<p>"; + /** Closing P tag **/ + public static final String TAG_P_END = "</p>"; + /** Search until the next <p> */ + public static final String TAG_P_START_NEXT = NEXT_START_TAG + TAG_P_START; + /** Search until the next </p> */ + public static final String TAG_P_END_NEXT = NEXT_START_TAG + TAG_P_END; + + /** strong tag */ + public static final String TAG_STRONG_START = "<strong>"; + /** Closing strong tag */ + public static final String TAG_STRONG_END = "</strong>"; + /** Search until the next <strong> */ + public static final String TAG_STRONG_START_NEXT = NEXT_START_TAG + TAG_STRONG_START; + /** Search until the next </strong> */ + public static final String TAG_STRONG_END_NEXT = NEXT_START_TAG + TAG_STRONG_END; + + /** div tag */ + public static final String TAG_DIV_START = "<div>"; + /** closing div tag */ + public static final String TAG_DIV_END = "</div>"; + /** Search until the next <div> */ + public static final String TAG_DIV_START_NEXT = NEXT_START_TAG + TAG_DIV_START; + /** Search until the next </div> */ + public static final String TAG_DIV_END_NEXT = NEXT_START_TAG + TAG_DIV_END; + + public final static Pattern PATTERN_DESCRIPTION_OLD = Pattern.compile("<span id=\"ctl00_ContentBody_LongDescription\"[^>]*>" + "(.*)</span>[^<]*</div>[^<]*<p>[^<]*</p>[^<]*<p>[^<]*<strong>\\W*Additional Hints</strong>", Pattern.CASE_INSENSITIVE); + public final static Pattern PATTERN_DESCRIPTION = Pattern.compile("<span id=\"ctl00_ContentBody_LongDescription\">(.*?)</span>[^<]*</div>[^<]*<p>[^<]*</p>[^<]*<p>[^<]*<strong>\\W*Additional Hints</strong>"); + + public final static Pattern PATTERN_HINT_OLD = Pattern.compile("<div id=\"div_hint\"[^>]*>(.*?)</div>", Pattern.CASE_INSENSITIVE); + public final static Pattern PATTERN_HINT = Pattern.compile("<div id=\"div_hint\"[^>]*>(.*?)</div>"); + + public final static Pattern PATTERN_SHORTDESC_OLD = Pattern.compile("<div class=\"UserSuppliedContent\">[^<]*<span id=\"ctl00_ContentBody_ShortDescription\"[^>]*>((?:(?!</span>[^\\w^<]*</div>).)*)</span>[^\\w^<]*</div>", Pattern.CASE_INSENSITIVE); + public final static Pattern PATTERN_SHORTDESC = Pattern.compile("<span id=\"ctl00_ContentBody_ShortDescription\">(.*?)</span>[^\\w^<]*</div>"); + + private final static Pattern PATTERN_GEOCODE_OLD = Pattern.compile("<meta name=\"og:url\" content=\"[^\"]+/(GC[0-9A-Z]+)\"[^>]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_GEOCODE = Pattern.compile("<meta name=\"og:url\" content=\"[^\"]+/(GC[0-9A-Z]+)\""); + + private final static Pattern PATTERN_CACHEID_OLD = Pattern.compile("/seek/log\\.aspx\\?ID=(\\d+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_CACHEID = Pattern.compile("/seek/log\\.aspx\\?ID=(\\d+)"); + + private final static Pattern PATTERN_GUID_OLD = Pattern.compile(Pattern.quote("&wid=") + "([0-9a-z\\-]+)" + Pattern.quote("&"), Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_GUID = Pattern.compile(Pattern.quote("&wid=") + "([0-9a-z\\-]+)" + Pattern.quote("&")); + + private final static Pattern PATTERN_SIZE_OLD = Pattern.compile("<div class=\"CacheSize[^\"]*\">[^<]*<p[^>]*>[^S]*Size[^:]*:[^<]*<span[^>]*>[^<]*<img src=\"[^\"]*/icons/container/[a-z_]+\\.gif\" alt=\"Size: ([^\"]+)\"[^>]*>[^<]*<small>[^<]*</small>[^<]*</span>[^<]*</p>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_SIZE = Pattern.compile("<div class=\"CacheSize[^\"]*\">[^<]*<p[^>]*>[^S]*Size[^:]*:[^<]*<span[^>]*>[^<]*<img src=\"[^\"]*/icons/container/[a-z_]+\\.gif\" alt=\"Size: ([^\"]+)\"[^>]*>[^<]*<small>[^<]*</small>[^<]*</span>[^<]*</p>"); + + private final static Pattern PATTERN_LATLON_OLD = Pattern.compile("<span id=\"ctl00_ContentBody_LatLon\"[^>]*>([^<]*)<\\/span>", Pattern.CASE_INSENSITIVE); + private final static Pattern PATTERN_LATLON = Pattern.compile("<span id=\"ctl00_ContentBody_LatLon\"[^>]*>(.*?)</span>"); + + private final static Pattern PATTERN_LOCATION_OLD = Pattern.compile("<span id=\"ctl00_ContentBody_Location\"[^>]*>In ([^<]*)", Pattern.CASE_INSENSITIVE); + private final static Pattern PATTERN_LOCATION = Pattern.compile("<span id=\"ctl00_ContentBody_Location\">In (.*?)</span>"); + + private final static Pattern PATTERN_PERSONALNOTE_OLD = Pattern.compile("<p id=\"cache_note\"[^>]*>([^<]*)</p>", Pattern.CASE_INSENSITIVE); + private final static Pattern PATTERN_PERSONALNOTE = Pattern.compile("<p id=\"cache_note\"[^>]*>(.*?)</p>"); + + private final static Pattern PATTERN_NAME_OLD = Pattern.compile("<h2[^>]*>[^<]*<span id=\"ctl00_ContentBody_CacheName\">([^<]+)<\\/span>[^<]*<\\/h2>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_NAME = Pattern.compile("<span id=\"ctl00_ContentBody_CacheName\">(.*?)</span>"); + + private final static Pattern PATTERN_DIFFICULTY_OLD = Pattern.compile("<span id=\"ctl00_ContentBody_uxLegendScale\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\"[^\"]+\"[^>]*>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_DIFFICULTY = Pattern.compile("<span id=\"ctl00_ContentBody_uxLegendScale\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\""); + + private final static Pattern PATTERN_TERRAIN_OLD = Pattern.compile("<span id=\"ctl00_ContentBody_Localize6\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\"[^\"]+\"[^>]*>[^<]*</span>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + private final static Pattern PATTERN_TERRAIN = Pattern.compile("<span id=\"ctl00_ContentBody_Localize6\"[^>]*>[^<]*<img src=\"[^\"]*/images/stars/stars([0-9_]+)\\.gif\" alt=\""); + + private final static Pattern PATTERN_OWNERREAL_OLD = Pattern.compile("<a id=\"ctl00_ContentBody_uxFindLinksHiddenByThisUser\" href=\"[^\"]*/seek/nearest\\.aspx\\?u=*([^\"]+)\">[^<]+</a>", Pattern.CASE_INSENSITIVE); + private final static Pattern PATTERN_OWNERREAL = Pattern.compile("<a id=\"ctl00_ContentBody_uxFindLinksHiddenByThisUser\" href=\"[^\"]*/seek/nearest\\.aspx\\?u=(.*?)\""); + + + public void testRegEx() { + List<String> output = doTheTests(10); + + for (String s : output) { + System.out.println(s); + } + } + + public static List<String> doTheTests(final int iterations) { + + List<String> output = new ArrayList<String>(); + + output.addAll(measure(iterations, "hint", PATTERN_HINT_OLD, PATTERN_HINT)); + output.addAll(measure(iterations, "description", PATTERN_DESCRIPTION_OLD, PATTERN_DESCRIPTION)); + output.addAll(measure(iterations, "short description", PATTERN_SHORTDESC_OLD, PATTERN_SHORTDESC)); + output.addAll(measure(iterations, "geocode", PATTERN_GEOCODE_OLD, PATTERN_GEOCODE)); + output.addAll(measure(iterations, "cache id", PATTERN_CACHEID_OLD, PATTERN_CACHEID)); + output.addAll(measure(iterations, "cache guid", PATTERN_GUID_OLD, PATTERN_GUID)); + output.addAll(measure(iterations, "size", PATTERN_SIZE_OLD, PATTERN_SIZE)); + output.addAll(measure(iterations, "latlon", PATTERN_LATLON_OLD, PATTERN_LATLON)); + output.addAll(measure(iterations, "location", PATTERN_LOCATION_OLD, PATTERN_LOCATION)); + output.addAll(measure(iterations, "personal note", PATTERN_PERSONALNOTE_OLD, PATTERN_PERSONALNOTE)); + output.addAll(measure(iterations, "name", PATTERN_NAME_OLD, PATTERN_NAME)); + output.addAll(measure(iterations, "difficulty", PATTERN_DIFFICULTY_OLD, PATTERN_DIFFICULTY)); + output.addAll(measure(iterations, "terrain", PATTERN_TERRAIN_OLD, PATTERN_TERRAIN)); + output.addAll(measure(iterations, "owner real", PATTERN_OWNERREAL_OLD, PATTERN_OWNERREAL)); + + return output; + } + + private static List<String> measure(int iterations, String fieldName, Pattern p1, Pattern p2) { + + List<String> output = new ArrayList<String>(); + output.add(fieldName + ":"); + + List<MockedCache> cachesForParsing = new ArrayList<MockedCache>(); + cachesForParsing.add(new GC2CJPF()); + cachesForParsing.add(new GC1ZXX2()); + + for (MockedCache cache : cachesForParsing) { + String page = cache.getData(); + String result1 = BaseUtils.getMatch(page, p1, 1, ""); + String result2 = BaseUtils.getMatch(page, p2, 1, ""); + assertEquals(result1, result2); + + long diff1, diff2; + + output.add("Parsing " + cache.getGeocode() + " " + cache.getName()); + { + diff1 = parse(page, p1, iterations); + output.add("Time pattern 1:\t" + diff1 + " ms"); + } + + { + diff2 = parse(page, p2, iterations); + output.add("Time pattern 2:\t" + diff2 + " ms"); + } + Float reduction = new Float((float) diff2 * 100 / (float) diff1); + output.add("New runtime:\t" + String.format("%.1f", reduction) + "%\n"); + } + + return output; + + } + + private static long parse(String page, Pattern pattern, int iterations) { + long start = System.currentTimeMillis(); + for (int j = 0; j < iterations; j++) { + BaseUtils.getMatch(page, pattern, 1, ""); + } + return (System.currentTimeMillis() - start); + + } + +} diff --git a/tests/src/cgeo/geocaching/test/RegExRealPerformanceTest.java b/tests/src/cgeo/geocaching/test/RegExRealPerformanceTest.java new file mode 100644 index 0000000..eda2f6d --- /dev/null +++ b/tests/src/cgeo/geocaching/test/RegExRealPerformanceTest.java @@ -0,0 +1,27 @@ +package cgeo.geocaching.test; + +import cgeo.geocaching.cgSettings; + +import android.test.AndroidTestCase; +import android.util.Log; + +import java.util.List; + +/** + * Test class to compare the performance of two regular expressions on given data. + * Can be used to improve the time needed to parse the cache data + * + * @author blafoo + */ +public class RegExRealPerformanceTest extends AndroidTestCase { + + public void testRegEx() { + + List<String> output = RegExPerformanceTest.doTheTests(10); + + for (String s : output) { + Log.w(cgSettings.tag, s); + } + + } +} diff --git a/tests/src/cgeo/geocaching/test/cgeoApplicationTest.java b/tests/src/cgeo/geocaching/test/cgeoApplicationTest.java index 00661c3..46bc1e5 100644 --- a/tests/src/cgeo/geocaching/test/cgeoApplicationTest.java +++ b/tests/src/cgeo/geocaching/test/cgeoApplicationTest.java @@ -102,6 +102,10 @@ public class cgeoApplicationTest extends ApplicationTestCase<cgeoapplication> { Assert.assertTrue(cacheParsed.getDescription().startsWith(cache.getDescription())); Assert.assertEquals(cache.getShortDescription(), cacheParsed.getShortDescription()); Assert.assertEquals(cache.getName(), cacheParsed.getName()); + Assert.assertEquals(cache.getCacheId(), cacheParsed.getCacheId()); + Assert.assertEquals(cache.getGuid(), cacheParsed.getGuid()); + Assert.assertEquals(cache.getLocation(), cacheParsed.getLocation()); + Assert.assertEquals(cache.getPersonalNote(), cacheParsed.getPersonalNote()); } } diff --git a/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java b/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java index 6141af4..de9b820 100644 --- a/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java +++ b/tests/src/cgeo/geocaching/test/mock/GC1ZXX2.java @@ -82,7 +82,27 @@ public class GC1ZXX2 extends MockedCache { @Override public String getName() { - return "Hannopoly: Eislisenstrasse "; + return "Hannopoly: Eislisenstrasse"; + } + + @Override + public String getCacheId() { + return "1433909"; + } + + @Override + public String getGuid() { + return "36d45871-b99d-46d6-95fc-ff86ab564c98"; + } + + @Override + public String getLocation() { + return "Niedersachsen, Germany"; + } + + @Override + public String getPersonalNote() { + return ""; } } diff --git a/tests/src/cgeo/geocaching/test/mock/GC2CJPF.java b/tests/src/cgeo/geocaching/test/mock/GC2CJPF.java index 79a51e4..eb8d315 100644 --- a/tests/src/cgeo/geocaching/test/mock/GC2CJPF.java +++ b/tests/src/cgeo/geocaching/test/mock/GC2CJPF.java @@ -85,4 +85,24 @@ public class GC2CJPF extends MockedCache { return "Kinderwald KiC"; } + @Override + public String getCacheId() { + return "1811409"; + } + + @Override + public String getGuid() { + return "73246a5a-ebb9-4d4f-8db9-a951036f5376"; + } + + @Override + public String getLocation() { + return "Niedersachsen, Germany"; + } + + @Override + public String getPersonalNote() { + return ""; + } + } diff --git a/tests/src/cgeo/geocaching/test/mock/MockedCache.java b/tests/src/cgeo/geocaching/test/mock/MockedCache.java index 86cb5fe..e8384fb 100644 --- a/tests/src/cgeo/geocaching/test/mock/MockedCache.java +++ b/tests/src/cgeo/geocaching/test/mock/MockedCache.java @@ -34,6 +34,7 @@ public abstract class MockedCache implements ICache { buffer.append(line).append('\n'); } + br.close(); return cgBase.replaceWhitespace(buffer.toString()); } catch (IOException e) { |