/** * */ package cgeo.geocaching.utils; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import org.eclipse.jdt.annotation.Nullable; import java.nio.charset.Charset; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.CRC32; /** * Misc. utils. All methods don't use Android specific stuff to use these methods in plain JUnit tests. */ public final class TextUtils { public static final Charset CHARSET_UTF8 = Charset.forName("UTF-8"); public static final Charset CHARSET_ASCII = Charset.forName("US-ASCII"); private static final Pattern PATTERN_REMOVE_NONPRINTABLE = Pattern.compile("\\p{Cntrl}"); private TextUtils() { // utility class } /** * Searches for the pattern p in the data. If the pattern is not found defaultValue is returned * * @param data * Data to search in * @param p * Pattern to search for * @param trim * Set to true if the group found should be trim'ed * @param group * Number of the group to return if found * @param defaultValue * Value to return if the pattern is not found * @param last * Find the last occurring value * @return defaultValue or the n-th group if the pattern matches (trimmed if wanted) */ @SuppressFBWarnings("DM_STRING_CTOR") public static String getMatch(@Nullable final String data, final Pattern p, final boolean trim, final int group, final String defaultValue, final boolean last) { if (data != null) { String result = null; final Matcher matcher = p.matcher(data); if (matcher.find()) { result = matcher.group(group); } if (null != result) { final Matcher remover = PATTERN_REMOVE_NONPRINTABLE.matcher(result); result = remover.replaceAll(" "); return trim ? new String(result).trim() : new String(result); // Java copies the whole page String, when matching with regular expressions // later this would block the garbage collector, as we only need tiny parts of the page // see http://developer.android.com/reference/java/lang/String.html#backing_array // Thus the creating of a new String via String constructor is necessary here!! // And BTW: You cannot even see that effect in the debugger, but must use a separate memory profiler! } } return defaultValue; } /** * Searches for the pattern p in the data. If the pattern is not found defaultValue is returned * * @param data * Data to search in * @param p * Pattern to search for * @param trim * Set to true if the group found should be trim'ed * @param defaultValue * Value to return if the pattern is not found * @return defaultValue or the first group if the pattern matches (trimmed if wanted) */ public static String getMatch(final String data, final Pattern p, final boolean trim, final String defaultValue) { return TextUtils.getMatch(data, p, trim, 1, defaultValue, false); } /** * Searches for the pattern p in the data. If the pattern is not found defaultValue is returned * * @param data * Data to search in * @param p * Pattern to search for * @param defaultValue * Value to return if the pattern is not found * @return defaultValue or the first group if the pattern matches (trimmed) */ public static String getMatch(@Nullable final String data, final Pattern p, final String defaultValue) { return TextUtils.getMatch(data, p, true, 1, defaultValue, false); } /** * Searches for the pattern p in the data. * * @param data * @param p * @return true if data contains the pattern p */ public static boolean matches(final String data, final Pattern p) { if (data == null) { return false; } // matcher is faster than String.contains() and more flexible - it takes patterns instead of fixed texts return p.matcher(data).find(); } /** * Replaces every \n, \r and \t with a single space. Afterwards multiple spaces * are merged into a single space. Finally leading spaces are deleted. * * This method must be fast, but may not lead to the shortest replacement String. * * You are only allowed to change this code if you can prove it became faster on a device. * see cgeo.geocaching.test.WhiteSpaceTest#replaceWhitespaceManually in the test project. * * @param data * complete HTML page * @return the HTML page as a very long single "line" */ public static String replaceWhitespace(final String data) { final int length = data.length(); final char[] chars = new char[length]; data.getChars(0, length, chars, 0); int resultSize = 0; boolean lastWasWhitespace = true; for (final char c : chars) { if (c == ' ' || c == '\n' || c == '\r' || c == '\t') { if (!lastWasWhitespace) { chars[resultSize++] = ' '; } lastWasWhitespace = true; } else { chars[resultSize++] = c; lastWasWhitespace = false; } } return String.valueOf(chars, 0, resultSize); } /** * Quick and naive check for possible rich HTML content in a string. * * @param str A string containing HTML code. * @return true if str contains HTML code that needs to go through a HTML renderer before * being displayed, false if it can be displayed as-is without any loss */ public static boolean containsHtml(final String str) { return str.indexOf('<') != -1 || str.indexOf('&') != -1; } /** * Remove all control characters (which are not valid in XML or HTML), as those should not appear in cache texts * anyway * * @param input * @return */ public static String removeControlCharacters(final String input) { final Matcher remover = PATTERN_REMOVE_NONPRINTABLE.matcher(input); return remover.replaceAll(" ").trim(); } /** * Calculate a simple checksum for change-checking (not usable for security/cryptography!) * * @param input * String to check * @return resulting checksum */ public static long checksum(final String input) { final CRC32 checksum = new CRC32(); checksum.update(input.getBytes()); return checksum.getValue(); } }