package cgeo.geocaching.test; import cgeo.geocaching.test.mock.GC1ZXX2; import cgeo.geocaching.test.mock.GC2CJPF; import cgeo.geocaching.test.mock.GC2JVEH; import cgeo.geocaching.test.mock.GC3XX5J; import cgeo.geocaching.test.mock.MockedCache; import cgeo.geocaching.utils.TextUtils; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; import junit.framework.TestCase; /** * Test class to compare the performance of two regular expressions on given data. * Can be used to improve the time needed to parse the cache data. * Run As "JUnit Test" */ public class RegExPerformanceTest extends TestCase { // Regular expression: "" // Input string 1: "" // Input string 2: "" will handle a large, non-matching string almost a hundred times faster then the previous one! /** Search until the start of the next tag. The tag can follow immediately */ public static final String NEXT_START_TAG = "[^<]*"; /** Search until the end of the actual tag. The closing tag can follow immediately */ public static final String NEXT_END_TAG = "[^>]*"; /** Search until the start of the next tag. The tag must not follow immediately */ public static final String NEXT_START_TAG2 = "[^<]+"; /** Search until the end of the actual tag. The closing tag must not follow immediately */ public static final String NEXT_END_TAG2 = "[^>]+"; /** P tag */ public static final String TAG_P_START = "

"; /** Closing P tag **/ public static final String TAG_P_END = "

"; /** Search until the next <p> */ public static final String TAG_P_START_NEXT = NEXT_START_TAG + TAG_P_START; /** Search until the next </p> */ public static final String TAG_P_END_NEXT = NEXT_START_TAG + TAG_P_END; /** strong tag */ public static final String TAG_STRONG_START = ""; /** Closing strong tag */ public static final String TAG_STRONG_END = ""; /** Search until the next <strong> */ public static final String TAG_STRONG_START_NEXT = NEXT_START_TAG + TAG_STRONG_START; /** Search until the next </strong> */ public static final String TAG_STRONG_END_NEXT = NEXT_START_TAG + TAG_STRONG_END; /** div tag */ public static final String TAG_DIV_START = "
"; /** closing div tag */ public static final String TAG_DIV_END = "
"; /** Search until the next <div> */ public static final String TAG_DIV_START_NEXT = NEXT_START_TAG + TAG_DIV_START; /** Search until the next </div> */ public static final String TAG_DIV_END_NEXT = NEXT_START_TAG + TAG_DIV_END; public final static Pattern PATTERN_DESCRIPTION_OLD = Pattern.compile("]*>" + "(.*)[^<]*[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints", Pattern.CASE_INSENSITIVE); public final static Pattern PATTERN_DESCRIPTION = Pattern.compile("(.*?)[^<]*[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints"); public final static List MOCKED_CACHES; static { MOCKED_CACHES = Arrays.asList(new GC2CJPF(), new GC1ZXX2(), new GC2JVEH(), new GC3XX5J()); } public static void testRegEx() { final List output = doTheTests(10); for (String s : output) { System.out.println(s); } } public static List doTheTests(final int iterations) { final List output = new ArrayList(); output.addAll(measure(iterations, "description", PATTERN_DESCRIPTION_OLD, PATTERN_DESCRIPTION)); return output; } private static List measure(int iterations, String fieldName, Pattern p1, Pattern p2) { final List output = new ArrayList(); output.add(fieldName + ":"); for (MockedCache cache : MOCKED_CACHES) { String page = cache.getData(); String result1 = TextUtils.getMatch(page, p1, true, ""); String result2 = TextUtils.getMatch(page, p2, true, ""); assertEquals(result1, result2); long diff1, diff2; output.add("Parsing " + cache.getGeocode() + " " + cache.getName()); diff1 = parse(page, p1, iterations); output.add("Time pattern 1:\t" + diff1 + " ms"); diff2 = parse(page, p2, iterations); output.add("Time pattern 2:\t" + diff2 + " ms"); float reduction = (float) diff2 * 100 / diff1; output.add("New runtime:\t" + String.format("%.1f", reduction) + "%\n"); } return output; } private static long parse(String page, Pattern pattern, int iterations) { final long start = System.currentTimeMillis(); for (int j = 0; j < iterations; j++) { TextUtils.getMatch(page, pattern, true, ""); } return System.currentTimeMillis() - start; } }