package cgeo.geocaching.test; import cgeo.geocaching.test.mock.GC1ZXX2; import cgeo.geocaching.test.mock.GC2CJPF; import cgeo.geocaching.test.mock.MockedCache; import cgeo.geocaching.utils.BaseUtils; import java.util.ArrayList; import java.util.List; import java.util.regex.Pattern; import junit.framework.TestCase; /** * Test class to compare the performance of two regular expressions on given data. * Can be used to improve the time needed to parse the cache data * Run As "JUnit Test" * * @author blafoo */ public class RegExPerformanceTest extends TestCase { // Regular expression: "" // Input string 1: "" // Input string 2: "" will handle a large, non-matching string almost a hundred times faster then the previous one! /** Search until the start of the next tag. The tag can follow immediately */ public static final String NEXT_START_TAG = "[^<]*"; /** Search until the end of the actual tag. The closing tag can follow immediately */ public static final String NEXT_END_TAG = "[^>]*"; /** Search until the start of the next tag. The tag must not follow immediately */ public static final String NEXT_START_TAG2 = "[^<]+"; /** Search until the end of the actual tag. The closing tag must not follow immediately */ public static final String NEXT_END_TAG2 = "[^>]+"; /** P tag */ public static final String TAG_P_START = "

"; /** Closing P tag **/ public static final String TAG_P_END = "

"; /** Search until the next <p> */ public static final String TAG_P_START_NEXT = NEXT_START_TAG + TAG_P_START; /** Search until the next </p> */ public static final String TAG_P_END_NEXT = NEXT_START_TAG + TAG_P_END; /** strong tag */ public static final String TAG_STRONG_START = ""; /** Closing strong tag */ public static final String TAG_STRONG_END = ""; /** Search until the next <strong> */ public static final String TAG_STRONG_START_NEXT = NEXT_START_TAG + TAG_STRONG_START; /** Search until the next </strong> */ public static final String TAG_STRONG_END_NEXT = NEXT_START_TAG + TAG_STRONG_END; /** div tag */ public static final String TAG_DIV_START = "
"; /** closing div tag */ public static final String TAG_DIV_END = "
"; /** Search until the next <div> */ public static final String TAG_DIV_START_NEXT = NEXT_START_TAG + TAG_DIV_START; /** Search until the next </div> */ public static final String TAG_DIV_END_NEXT = NEXT_START_TAG + TAG_DIV_END; public final static Pattern PATTERN_DESCRIPTION_OLD = Pattern.compile("]*>" + "(.*)[^<]*[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints", Pattern.CASE_INSENSITIVE); public final static Pattern PATTERN_DESCRIPTION = Pattern.compile("(.*?)[^<]*[^<]*

[^<]*

[^<]*

[^<]*\\W*Additional Hints"); public final static Pattern PATTERN_HINT_OLD = Pattern.compile("

]*>(.*?)
", Pattern.CASE_INSENSITIVE); public final static Pattern PATTERN_HINT = Pattern.compile("
]*>(.*?)
"); public final static Pattern PATTERN_SHORTDESC_OLD = Pattern.compile("
[^<]*]*>((?:(?![^\\w^<]*
).)*)[^\\w^<]*", Pattern.CASE_INSENSITIVE); public final static Pattern PATTERN_SHORTDESC = Pattern.compile("(.*?)[^\\w^<]*"); private final static Pattern PATTERN_GEOCODE_OLD = Pattern.compile("]*>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern PATTERN_GEOCODE = Pattern.compile("[^<]*]*>[^S]*Size[^:]*:[^<]*]*>[^<]*\"Size:]*>[^<]*[^<]*[^<]*[^<]*

", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern PATTERN_SIZE = Pattern.compile("
[^<]*]*>[^S]*Size[^:]*:[^<]*]*>[^<]*\"Size:]*>[^<]*[^<]*[^<]*[^<]*

"); private final static Pattern PATTERN_LATLON_OLD = Pattern.compile("]*>([^<]*)<\\/span>", Pattern.CASE_INSENSITIVE); private final static Pattern PATTERN_LATLON = Pattern.compile("]*>(.*?)"); private final static Pattern PATTERN_LOCATION_OLD = Pattern.compile("]*>In ([^<]*)", Pattern.CASE_INSENSITIVE); private final static Pattern PATTERN_LOCATION = Pattern.compile("In (.*?)"); private final static Pattern PATTERN_PERSONALNOTE_OLD = Pattern.compile("

]*>([^<]*)

", Pattern.CASE_INSENSITIVE); private final static Pattern PATTERN_PERSONALNOTE = Pattern.compile("

]*>(.*?)

"); private final static Pattern PATTERN_NAME_OLD = Pattern.compile("]*>[^<]*([^<]+)<\\/span>[^<]*<\\/h2>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern PATTERN_NAME = Pattern.compile("(.*?)"); private final static Pattern PATTERN_DIFFICULTY_OLD = Pattern.compile("]*>[^<]*\"[^\"]+\"[^]*>[^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern PATTERN_DIFFICULTY = Pattern.compile("]*>[^<]*\"");]*>[^<]*\"[^\"]+\"[^]*>[^<]*", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); private final static Pattern PATTERN_TERRAIN = Pattern.compile("]*>[^<]*\"");[^<]+", Pattern.CASE_INSENSITIVE); private final static Pattern PATTERN_OWNERREAL = Pattern.compile(" output = doTheTests(10); for (String s : output) { System.out.println(s); } } public static List doTheTests(final int iterations) { List output = new ArrayList(); output.addAll(measure(iterations, "hint", PATTERN_HINT_OLD, PATTERN_HINT)); output.addAll(measure(iterations, "description", PATTERN_DESCRIPTION_OLD, PATTERN_DESCRIPTION)); output.addAll(measure(iterations, "short description", PATTERN_SHORTDESC_OLD, PATTERN_SHORTDESC)); output.addAll(measure(iterations, "geocode", PATTERN_GEOCODE_OLD, PATTERN_GEOCODE)); output.addAll(measure(iterations, "cache id", PATTERN_CACHEID_OLD, PATTERN_CACHEID)); output.addAll(measure(iterations, "cache guid", PATTERN_GUID_OLD, PATTERN_GUID)); output.addAll(measure(iterations, "size", PATTERN_SIZE_OLD, PATTERN_SIZE)); output.addAll(measure(iterations, "latlon", PATTERN_LATLON_OLD, PATTERN_LATLON)); output.addAll(measure(iterations, "location", PATTERN_LOCATION_OLD, PATTERN_LOCATION)); output.addAll(measure(iterations, "personal note", PATTERN_PERSONALNOTE_OLD, PATTERN_PERSONALNOTE)); output.addAll(measure(iterations, "name", PATTERN_NAME_OLD, PATTERN_NAME)); output.addAll(measure(iterations, "difficulty", PATTERN_DIFFICULTY_OLD, PATTERN_DIFFICULTY)); output.addAll(measure(iterations, "terrain", PATTERN_TERRAIN_OLD, PATTERN_TERRAIN)); output.addAll(measure(iterations, "owner real", PATTERN_OWNERREAL_OLD, PATTERN_OWNERREAL)); return output; } private static List measure(int iterations, String fieldName, Pattern p1, Pattern p2) { List output = new ArrayList(); output.add(fieldName + ":"); List cachesForParsing = new ArrayList(); cachesForParsing.add(new GC2CJPF()); cachesForParsing.add(new GC1ZXX2()); for (MockedCache cache : cachesForParsing) { String page = cache.getData(); String result1 = BaseUtils.getMatch(page, p1, 1, ""); String result2 = BaseUtils.getMatch(page, p2, 1, ""); assertEquals(result1, result2); long diff1, diff2; output.add("Parsing " + cache.getGeocode() + " " + cache.getName()); { diff1 = parse(page, p1, iterations); output.add("Time pattern 1:\t" + diff1 + " ms"); } { diff2 = parse(page, p2, iterations); output.add("Time pattern 2:\t" + diff2 + " ms"); } Float reduction = new Float((float) diff2 * 100 / (float) diff1); output.add("New runtime:\t" + String.format("%.1f", reduction) + "%\n"); } return output; } private static long parse(String page, Pattern pattern, int iterations) { long start = System.currentTimeMillis(); for (int j = 0; j < iterations; j++) { BaseUtils.getMatch(page, pattern, 1, ""); } return (System.currentTimeMillis() - start); } }