aboutsummaryrefslogtreecommitdiffstats
path: root/tests/src/cgeo/geocaching/test/RegExPerformanceTest.java
blob: 7c3b1953dfd996ee3c07a77a90dc5f22eb9fdbee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
package cgeo.geocaching.test;

import cgeo.geocaching.test.mock.GC1ZXX2;
import cgeo.geocaching.test.mock.GC2CJPF;
import cgeo.geocaching.test.mock.GC2JVEH;
import cgeo.geocaching.test.mock.GC3XX5J;
import cgeo.geocaching.test.mock.MockedCache;
import cgeo.geocaching.utils.TextUtils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;

import junit.framework.TestCase;

/**
 * Test class to compare the performance of two regular expressions on given data.
 * Can be used to improve the time needed to parse the cache data.
 * Run As "JUnit Test"
 */
public class RegExPerformanceTest extends TestCase {

    // Regular expression: "<img.*src=(\S*)/>"
    // Input string 1: "<img border=1 src=image.jpg />"
    // Input string 2: "<img src=src=src=src= .... many src= ... src=src="
    // "a(.*)a", it's much better to use "a([^a]*)a".
    // The rewritten expression "<img((?!src=).)*src=(\S*)/>" will handle a large, non-matching string almost a hundred times faster then the previous one!

    /** Search until the start of the next tag. The tag can follow immediately */
    public static final String NEXT_START_TAG = "[^<]*";
    /** Search until the end of the actual tag. The closing tag can follow immediately */
    public static final String NEXT_END_TAG = "[^>]*";

    /** Search until the start of the next tag. The tag must not follow immediately */
    public static final String NEXT_START_TAG2 = "[^<]+";
    /** Search until the end of the actual tag. The closing tag must not follow immediately */
    public static final String NEXT_END_TAG2 = "[^>]+";

    /** P tag */
    public static final String TAG_P_START = "<p>";
    /** Closing P tag **/
    public static final String TAG_P_END = "</p>";
    /** Search until the next &lt;p&gt; */
    public static final String TAG_P_START_NEXT = NEXT_START_TAG + TAG_P_START;
    /** Search until the next &lt;/p&gt; */
    public static final String TAG_P_END_NEXT = NEXT_START_TAG + TAG_P_END;

    /** strong tag */
    public static final String TAG_STRONG_START = "<strong>";
    /** Closing strong tag */
    public static final String TAG_STRONG_END = "</strong>";
    /** Search until the next &lt;strong&gt; */
    public static final String TAG_STRONG_START_NEXT = NEXT_START_TAG + TAG_STRONG_START;
    /** Search until the next &lt;/strong&gt; */
    public static final String TAG_STRONG_END_NEXT = NEXT_START_TAG + TAG_STRONG_END;

    /** div tag */
    public static final String TAG_DIV_START = "<div>";
    /** closing div tag */
    public static final String TAG_DIV_END = "</div>";
    /** Search until the next &lt;div&gt; */
    public static final String TAG_DIV_START_NEXT = NEXT_START_TAG + TAG_DIV_START;
    /** Search until the next &lt;/div&gt; */
    public static final String TAG_DIV_END_NEXT = NEXT_START_TAG + TAG_DIV_END;

    public final static Pattern PATTERN_DESCRIPTION_OLD = Pattern.compile("<span id=\"ctl00_ContentBody_LongDescription\"[^>]*>" + "(.*)</span>[^<]*</div>[^<]*<p>[^<]*</p>[^<]*<p>[^<]*<strong>\\W*Additional Hints</strong>", Pattern.CASE_INSENSITIVE);
    public final static Pattern PATTERN_DESCRIPTION = Pattern.compile("<span id=\"ctl00_ContentBody_LongDescription\">(.*?)</span>[^<]*</div>[^<]*<p>[^<]*</p>[^<]*<p>[^<]*<strong>\\W*Additional Hints</strong>");

    public final static List<MockedCache> MOCKED_CACHES;
    static {
        MOCKED_CACHES = Arrays.asList(new GC2CJPF(), new GC1ZXX2(), new GC2JVEH(), new GC3XX5J());
    }

    public static void testRegEx() {
        final List<String> output = doTheTests(10);

        for (String s : output) {
            System.out.println(s);
        }
    }

    public static List<String> doTheTests(final int iterations) {

        final List<String> output = new ArrayList<String>();

        output.addAll(measure(iterations, "description", PATTERN_DESCRIPTION_OLD, PATTERN_DESCRIPTION));

        return output;
    }

    private static List<String> measure(int iterations, String fieldName, Pattern p1, Pattern p2) {

        final List<String> output = new ArrayList<String>();
        output.add(fieldName + ":");

        for (MockedCache cache : MOCKED_CACHES) {
            String page = cache.getData();
            String result1 = TextUtils.getMatch(page, p1, true, "");
            String result2 = TextUtils.getMatch(page, p2, true, "");
            assertEquals(result1, result2);

            long diff1, diff2;

            output.add("Parsing " + cache.getGeocode() + " " + cache.getName());

            diff1 = parse(page, p1, iterations);
            output.add("Time pattern 1:\t" + diff1 + " ms");

            diff2 = parse(page, p2, iterations);
            output.add("Time pattern 2:\t" + diff2 + " ms");

            float reduction = (float) diff2 * 100 / diff1;
            output.add("New runtime:\t" + String.format("%.1f", reduction) + "%\n");
        }

        return output;

    }

    private static long parse(String page, Pattern pattern, int iterations) {
        final long start = System.currentTimeMillis();
        for (int j = 0; j < iterations; j++) {
            TextUtils.getMatch(page, pattern, true, "");
        }
        return System.currentTimeMillis() - start;

    }

}