aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--main/src/cgeo/geocaching/Constants.java53
-rw-r--r--main/src/cgeo/geocaching/cgBase.java69
-rw-r--r--main/src/cgeo/geocaching/utils/BaseUtils.java48
-rw-r--r--tests/src/cgeo/geocaching/test/RegExPerformanceTest.java104
-rw-r--r--tests/src/cgeo/geocaching/test/mock/MockedCache.java5
5 files changed, 224 insertions, 55 deletions
diff --git a/main/src/cgeo/geocaching/Constants.java b/main/src/cgeo/geocaching/Constants.java
new file mode 100644
index 0000000..3c2f74a
--- /dev/null
+++ b/main/src/cgeo/geocaching/Constants.java
@@ -0,0 +1,53 @@
+package cgeo.geocaching;
+
+import java.util.regex.Pattern;
+
+public class Constants {
+
+ /**
+ * For further information about patters have a look at
+ * http://download.oracle.com/javase/1.4.2/docs/api/java/util/regex/Pattern.html
+ */
+
+ /** Search until the start of the next tag. The tag can follow immediately */
+ public static final String NEXT_START_TAG = "[^<]*";
+ /** Search until the end of the actual tag. The closing tag can follow immediately */
+ public static final String NEXT_END_TAG = "[^>]*";
+
+ /** Search until the start of the next tag. The tag must not follow immediately */
+ public static final String NEXT_START_TAG2 = "[^<]+";
+ /** Search until the end of the actual tag. The closing tag must not follow immediately */
+ public static final String NEXT_END_TAG2 = "[^>]+";
+
+ /** P tag */
+ public static final String TAG_P_START = "<p>";
+ /** Closing P tag **/
+ public static final String TAG_P_END = "</p>";
+ /** Search until the next &lt;p&gt; */
+ public static final String TAG_P_START_NEXT = NEXT_START_TAG + TAG_P_START;
+ /** Search until the next &lt;/p&gt; */
+ public static final String TAG_P_END_NEXT = NEXT_START_TAG + TAG_P_END;
+
+ /** strong tag */
+ public static final String TAG_STRONG_START = "<strong>";
+ /** Closing strong tag */
+ public static final String TAG_STRONG_END = "</strong>";
+ /** Search until the next &lt;strong&gt; */
+ public static final String TAG_STRONG_START_NEXT = NEXT_START_TAG + TAG_STRONG_START;
+ /** Search until the next &lt;/strong&gt; */
+ public static final String TAG_STRONG_END_NEXT = NEXT_START_TAG + TAG_STRONG_END;
+
+ /** div tag */
+ public static final String TAG_DIV_START = "<div>";
+ /** closing div tag */
+ public static final String TAG_DIV_END = "</div>";
+ /** Search until the next &lt;div&gt; */
+ public static final String TAG_DIV_START_NEXT = NEXT_START_TAG + TAG_DIV_START;
+ /** Search until the next &lt;/div&gt; */
+ public static final String TAG_DIV_END_NEXT = NEXT_START_TAG + TAG_DIV_END;
+
+ public final static Pattern PATTERN_HINT = Pattern.compile("Additional Hints" + Constants.TAG_STRONG_END + "[^\\(]*\\(<a" + Constants.NEXT_END_TAG2 + ">Encrypt</a>\\)" + Constants.TAG_P_END +
+ Constants.NEXT_START_TAG + "<div id=\"div_hint\"" + Constants.NEXT_END_TAG + ">(.*?)" + Constants.TAG_DIV_END + Constants.NEXT_START_TAG + "<div id='dk'");
+ public final static Pattern PATTERN_DESC = Pattern.compile("<span id=\"ctl00_ContentBody_LongDescription\">(.*?)</span>" + Constants.TAG_DIV_END_NEXT + Constants.TAG_P_START_NEXT + Constants.TAG_P_END_NEXT + Constants.TAG_P_START_NEXT + Constants.TAG_STRONG_START_NEXT + "\\W*Additional Hints" + Constants.TAG_STRONG_END);
+
+}
diff --git a/main/src/cgeo/geocaching/cgBase.java b/main/src/cgeo/geocaching/cgBase.java
index 328a4d4..f8e0c24 100644
--- a/main/src/cgeo/geocaching/cgBase.java
+++ b/main/src/cgeo/geocaching/cgBase.java
@@ -8,6 +8,7 @@ import cgeo.geocaching.enumerations.WaypointType;
import cgeo.geocaching.files.LocParser;
import cgeo.geocaching.geopoint.DistanceParser;
import cgeo.geocaching.geopoint.Geopoint;
+import cgeo.geocaching.utils.BaseUtils;
import cgeo.geocaching.utils.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
@@ -100,10 +101,8 @@ public class cgBase {
private final static Pattern patternFoundAlternative = Pattern.compile("<div class=\"StatusInformationWidget FavoriteWidget\"", Pattern.CASE_INSENSITIVE);
private final static Pattern patternLatLon = Pattern.compile("<span id=\"ctl00_ContentBody_LatLon\"[^>]*>(<b>)?([^<]*)(<\\/b>)?<\\/span>", Pattern.CASE_INSENSITIVE);
private final static Pattern patternLocation = Pattern.compile("<span id=\"ctl00_ContentBody_Location\"[^>]*>In ([^<]*)", Pattern.CASE_INSENSITIVE);
- private final static Pattern patternHint = Pattern.compile("<div id=\"div_hint\"[^>]*>(.*?)</div>", Pattern.CASE_INSENSITIVE);
private final static Pattern patternPersonalNote = Pattern.compile("<p id=\"cache_note\"[^>]*>([^<]*)</p>", Pattern.CASE_INSENSITIVE);
private final static Pattern patternDescShort = Pattern.compile("<div class=\"UserSuppliedContent\">[^<]*<span id=\"ctl00_ContentBody_ShortDescription\"[^>]*>((?:(?!</span>[^\\w^<]*</div>).)*)</span>[^\\w^<]*</div>", Pattern.CASE_INSENSITIVE);
- private final static Pattern patternDesc = Pattern.compile("<span id=\"ctl00_ContentBody_LongDescription\"[^>]*>" + "(.*)</span>[^<]*</div>[^<]*<p>[^<]*</p>[^<]*<p>[^<]*<strong>\\W*Additional Hints</strong>", Pattern.CASE_INSENSITIVE);
private final static Pattern patternCountLogs = Pattern.compile("<span id=\"ctl00_ContentBody_lblFindCounts\"><p(.+?)<\\/p><\\/span>", Pattern.CASE_INSENSITIVE);
private final static Pattern patternCountLog = Pattern.compile("src=\"\\/images\\/icons\\/(.+?).gif\"[^>]+> (\\d*[,.]?\\d+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
private final static Pattern patternAttributes = Pattern.compile("<h3 class=\"WidgetHeader\">[^<]*<img[^>]+>\\W*Attributes[^<]*</h3>[^<]*<div class=\"WidgetBody\">(([^<]*<img src=\"[^\"]+\" alt=\"[^\"]+\"[^>]*>)+)[^<]*<p", Pattern.CASE_INSENSITIVE);
@@ -1091,7 +1090,7 @@ public class cgBase {
try {
final Matcher matcherGeocode = patternGeocode.matcher(page);
if (matcherGeocode.find() && matcherGeocode.groupCount() > 0) {
- cache.geocode = getMatch(matcherGeocode.group(1));
+ cache.geocode = BaseUtils.getMatch(matcherGeocode.group(1));
}
} catch (Exception e) {
// failed to parse cache geocode
@@ -1102,7 +1101,7 @@ public class cgBase {
try {
final Matcher matcherCacheId = patternCacheId.matcher(page);
if (matcherCacheId.find() && matcherCacheId.groupCount() > 0) {
- cache.cacheId = getMatch(matcherCacheId.group(1));
+ cache.cacheId = BaseUtils.getMatch(matcherCacheId.group(1));
}
} catch (Exception e) {
// failed to parse cache id
@@ -1113,7 +1112,7 @@ public class cgBase {
try {
final Matcher matcherCacheGuid = patternCacheGuid.matcher(page);
if (matcherCacheGuid.find() && matcherCacheGuid.groupCount() > 0) {
- cache.guid = getMatch(matcherCacheGuid.group(1));
+ cache.guid = BaseUtils.getMatch(matcherCacheGuid.group(1));
}
} catch (Exception e) {
// failed to parse cache guid
@@ -1239,7 +1238,7 @@ public class cgBase {
try {
final Matcher matcherSize = patternSize.matcher(tableInside);
if (matcherSize.find() && matcherSize.groupCount() > 0) {
- cache.size = CacheSize.FIND_BY_ID.get(getMatch(matcherSize.group(1)).toLowerCase());
+ cache.size = CacheSize.FIND_BY_ID.get(BaseUtils.getMatch(matcherSize.group(1)).toLowerCase());
}
} catch (Exception e) {
// failed to parse size
@@ -1274,7 +1273,7 @@ public class cgBase {
try {
final Matcher matcherLatLon = patternLatLon.matcher(page);
if (matcherLatLon.find() && matcherLatLon.groupCount() > 0) {
- cache.latlon = getMatch(matcherLatLon.group(2)); // first is <b>
+ cache.latlon = BaseUtils.getMatch(matcherLatLon.group(2)); // first is <b>
Map<String, Object> tmp = cgBase.parseLatlon(cache.latlon);
if (tmp.size() > 0) {
@@ -1294,7 +1293,7 @@ public class cgBase {
try {
final Matcher matcherLocation = patternLocation.matcher(page);
if (matcherLocation.find() && matcherLocation.groupCount() > 0) {
- cache.location = getMatch(matcherLocation.group(1));
+ cache.location = BaseUtils.getMatch(matcherLocation.group(1));
}
} catch (Exception e) {
// failed to parse location
@@ -1303,7 +1302,7 @@ public class cgBase {
// cache hint
try {
- final Matcher matcherHint = patternHint.matcher(page);
+ final Matcher matcherHint = Constants.PATTERN_HINT.matcher(page);
if (matcherHint.find() && matcherHint.group(1) != null) {
// replace linebreak and paragraph tags
String hint = Pattern.compile("<(br|p)[^>]*>").matcher(matcherHint.group(1)).replaceAll("\n");
@@ -1346,7 +1345,7 @@ public class cgBase {
try {
final Matcher matcherPersonalNote = patternPersonalNote.matcher(page);
if (matcherPersonalNote.find() && matcherPersonalNote.groupCount() > 0) {
- cache.personalNote = getMatch(matcherPersonalNote.group(1));
+ cache.personalNote = BaseUtils.getMatch(matcherPersonalNote.group(1));
}
} catch (Exception e) {
// failed to parse cache personal note
@@ -1357,7 +1356,7 @@ public class cgBase {
try {
final Matcher matcherDescShort = patternDescShort.matcher(page);
if (matcherDescShort.find() && matcherDescShort.groupCount() > 0) {
- cache.shortdesc = getMatch(matcherDescShort.group(1));
+ cache.shortdesc = BaseUtils.getMatch(matcherDescShort.group(1));
}
} catch (Exception e) {
// failed to parse short description
@@ -1366,9 +1365,9 @@ public class cgBase {
// cache description
try {
- final Matcher matcherDesc = patternDesc.matcher(page);
+ final Matcher matcherDesc = Constants.PATTERN_DESC.matcher(page);
if (matcherDesc.find() && matcherDesc.groupCount() > 0) {
- cache.description = getMatch(matcherDesc.group(1));
+ cache.description = BaseUtils.getMatch(matcherDesc.group(1));
}
} catch (Exception e) {
// failed to parse short description
@@ -1775,16 +1774,6 @@ public class cgBase {
}
}
- private static String getMatch(String match) {
- // creating a new String via String constructor is necessary here!!
- return new String(match.trim());
- // Java copies the whole page String, when matching with regular expressions
- // later this would block the garbage collector, as we only need tiny parts of the page
- // see http://developer.android.com/reference/java/lang/String.html#backing_array
-
- // And BTW: You cannot even see that effect in the debugger, but must use a separate memory profiler!
- }
-
public Date parseGcCustomDate(String input)
throws ParseException
{
@@ -3745,7 +3734,7 @@ public class cgBase {
"GET", new HashMap<String, String>(), requestId, false, false, false);
} else {
if (StringUtils.isNotEmpty(buffer)) {
- replaceWhitespace(buffer);
+ BaseUtils.replaceWhitespace(buffer);
String data = buffer.toString();
buffer = null;
@@ -3766,32 +3755,6 @@ public class cgBase {
return response;
}
- /**
- * Replace the characters \n, \r and \t with a space
- *
- * @param buffer
- * The data
- */
- public static void replaceWhitespace(final StringBuffer buffer) {
- final int length = buffer.length();
- final char[] chars = new char[length];
- buffer.getChars(0, length, chars, 0);
- int resultSize = 0;
- boolean lastWasWhitespace = false;
- for (char c : chars) {
- if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
- if (!lastWasWhitespace) {
- chars[resultSize++] = ' ';
- }
- lastWasWhitespace = true;
- } else {
- chars[resultSize++] = c;
- lastWasWhitespace = false;
- }
- }
- buffer.setLength(0);
- buffer.append(chars);
- }
public String requestJSONgc(final URI uri, String params) {
int httpCode = -1;
@@ -3879,7 +3842,7 @@ public class cgBase {
final URI newLocation = uri.resolve(httpLocation);
page = requestJSONgc(newLocation, params);
} else {
- replaceWhitespace(buffer);
+ BaseUtils.replaceWhitespace(buffer);
page = buffer.toString();
}
@@ -4029,7 +3992,7 @@ public class cgBase {
* }
* } else {
*/
- replaceWhitespace(buffer);
+ BaseUtils.replaceWhitespace(buffer);
page = buffer.toString();
//}
@@ -4604,7 +4567,7 @@ public class cgBase {
/**
* Generate a numeric date and time string according to system-wide settings (locale,
* date format) such as "7 sept. à 12:35".
- *
+ *
* @param context
* a Context
* @param date
diff --git a/main/src/cgeo/geocaching/utils/BaseUtils.java b/main/src/cgeo/geocaching/utils/BaseUtils.java
new file mode 100644
index 0000000..2025765
--- /dev/null
+++ b/main/src/cgeo/geocaching/utils/BaseUtils.java
@@ -0,0 +1,48 @@
+/**
+ *
+ */
+package cgeo.geocaching.utils;
+
+/**
+ * Misc. utils
+ */
+public final class BaseUtils {
+
+ /**
+ * Replace the characters \n, \r and \t with a space
+ *
+ * @param buffer
+ * The data
+ */
+ public static void replaceWhitespace(final StringBuffer buffer) {
+ final int length = buffer.length();
+ final char[] chars = new char[length];
+ buffer.getChars(0, length, chars, 0);
+ int resultSize = 0;
+ boolean lastWasWhitespace = false;
+ for (char c : chars) {
+ if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
+ if (!lastWasWhitespace) {
+ chars[resultSize++] = ' ';
+ }
+ lastWasWhitespace = true;
+ } else {
+ chars[resultSize++] = c;
+ lastWasWhitespace = false;
+ }
+ }
+ buffer.setLength(0);
+ buffer.append(chars);
+ }
+
+ public static String getMatch(String match) {
+ // creating a new String via String constructor is necessary here!!
+ return new String(match.trim());
+ // Java copies the whole page String, when matching with regular expressions
+ // later this would block the garbage collector, as we only need tiny parts of the page
+ // see http://developer.android.com/reference/java/lang/String.html#backing_array
+
+ // And BTW: You cannot even see that effect in the debugger, but must use a separate memory profiler!
+ }
+
+}
diff --git a/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java b/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java
new file mode 100644
index 0000000..277ec32
--- /dev/null
+++ b/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java
@@ -0,0 +1,104 @@
+package cgeo.geocaching.test;
+
+import cgeo.geocaching.Constants;
+import cgeo.geocaching.test.mock.GC1ZXX2;
+import cgeo.geocaching.test.mock.GC2CJPF;
+import cgeo.geocaching.test.mock.MockedCache;
+import cgeo.geocaching.utils.BaseUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import junit.framework.TestCase;
+
+/**
+ * Test class to compare the performance of two regular expressions on given data.
+ * Can be used to improve the time needed to parse the cache data
+ * Run As "JUnit Test"
+ *
+ * @author blafoo
+ */
+public class RegExPerformanceTest extends TestCase {
+
+ // Regular expression: "<img.*src=(\S*)/>"
+ // Input string 1: "<img border=1 src=image.jpg />"
+ // Input string 2: "<img src=src=src=src= .... many src= ... src=src="
+ // "a(.*)a", it's much better to use "a([^a]*)a".
+ // The rewritten expression "<img((?!src=).)*src=(\S*)/>" will handle a large, non-matching string almost a hundred times faster then the previous one!
+
+ private final static Pattern PATTERN_ACTUAL = Pattern.compile("<div id=\"div_hint\"[^>]*>(.*?)</div>", Pattern.CASE_INSENSITIVE);
+
+ private static final Pattern PATTERN_IMPROVED = Pattern.compile(
+ "Additional Hints" + Constants.TAG_STRONG_END +
+ "[^\\(]*\\(<a" + Constants.NEXT_END_TAG2 + ">Encrypt</a>\\)" + Constants.TAG_P_END +
+ Constants.NEXT_START_TAG + "<div id=\"div_hint\"" + Constants.NEXT_END_TAG + ">(.*?)" + Constants.TAG_DIV_END + Constants.NEXT_START_TAG + "<div id='dk'");
+
+
+ private String parseHint(String data, Pattern p, int group) {
+ String result = "";
+ final Matcher matcherHint = p.matcher(data);
+ if (matcherHint.find() && matcherHint.groupCount() >= group && matcherHint.group(group) != null) {
+ // replace linebreak and paragraph tags
+ String hint = Pattern.compile("<(br|p)" + Constants.NEXT_END_TAG + ">").matcher(matcherHint.group(group)).replaceAll("\n");
+ if (hint != null) {
+ result = hint.replaceAll(Pattern.quote(Constants.TAG_P_END), "").trim();
+ }
+ }
+ return result;
+ }
+
+ private String parseDescription(String data, Pattern p, int group) {
+ String result = null;
+ final Matcher matcher = p.matcher(data);
+ if (matcher.find() && matcher.groupCount() >= group) {
+ result = BaseUtils.getMatch(matcher.group(group));
+ }
+ return result;
+ }
+
+
+ public void testRegEx() {
+
+ List<MockedCache> cachesForParsing = new ArrayList<MockedCache>();
+ cachesForParsing.add(new GC2CJPF());
+ cachesForParsing.add(new GC1ZXX2());
+
+ int ITERATIONS = 250; // 250 for an fast evaluation, 10000 else
+
+ for (MockedCache cache : cachesForParsing) {
+ String page = cache.getData();
+ String resultOld = parseHint(page, PATTERN_ACTUAL, 1);
+ String resultNew = parseHint(page, PATTERN_IMPROVED, 1);
+ assertEquals(resultOld, resultNew);
+
+ long diffOld, diffNew;
+
+ System.out.println("Parsing " + cache.getGeocode() + " " + cache.getName());
+ {
+ System.out.println(("Result actual pattern:\t<<" + resultOld + ">>"));
+
+ long start = System.currentTimeMillis();
+ for (int j = 0; j < ITERATIONS; j++) {
+ parseHint(page, PATTERN_ACTUAL, 1);
+ }
+ diffOld = (System.currentTimeMillis() - start);
+ System.out.println("Time actual pattern:\t" + diffOld + " ms");
+ }
+
+ {
+ System.out.println(("Result new pattern:\t<<" + resultNew + ">>"));
+ long start = System.currentTimeMillis();
+ for (int j = 0; j < ITERATIONS; j++) {
+ parseHint(page, PATTERN_IMPROVED, 1);
+ }
+ diffNew = (System.currentTimeMillis() - start);
+ System.out.println("Time new pattern:\t" + diffNew + " ms");
+ }
+ Float reduction = new Float((float) diffNew * 100 / (float) diffOld);
+ System.out.println("Reduction to x percent:\t" + reduction.toString() + "\n");
+ }
+
+ }
+}
diff --git a/tests/src/cgeo/geocaching/test/mock/MockedCache.java b/tests/src/cgeo/geocaching/test/mock/MockedCache.java
index 7494028..8c04a18 100644
--- a/tests/src/cgeo/geocaching/test/mock/MockedCache.java
+++ b/tests/src/cgeo/geocaching/test/mock/MockedCache.java
@@ -1,7 +1,7 @@
package cgeo.geocaching.test.mock;
import cgeo.geocaching.ICache;
-import cgeo.geocaching.cgBase;
+import cgeo.geocaching.utils.BaseUtils;
import java.io.BufferedReader;
import java.io.IOException;
@@ -27,9 +27,10 @@ public abstract class MockedCache implements ICache {
buffer.append(line).append("\n");
}
+
br.close();
- cgBase.replaceWhitespace(buffer);
+ BaseUtils.replaceWhitespace(buffer);
return buffer.toString();
} catch (IOException e) {