1 files changed, 0 insertions, 421 deletions
diff --git a/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java b/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
deleted file mode 100644
index f41595e..0000000
--- a/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
+++ /dev/null
@@ -1,421 +0,0 @@
-// Protocol Buffers - Google's data interchange format
-// Copyright 2008 Google Inc.  All rights reserved.
-// https://developers.google.com/protocol-buffers/
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package com.google.protobuf;
-
-import static junit.framework.Assert.*;
-
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Random;
-import java.util.logging.Logger;
-import java.nio.charset.CharsetDecoder;
-import java.nio.charset.Charset;
-import java.nio.charset.CodingErrorAction;
-import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CoderResult;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
-
-/**
- * Shared testing code for {@link IsValidUtf8Test} and
- * {@link IsValidUtf8FourByteTest}.
- *
- * @author jonp@google.com (Jon Perlow)
- * @author martinrb@google.com (Martin Buchholz)
- */
-class IsValidUtf8TestUtil {
-  private static Logger logger = Logger.getLogger(
-      IsValidUtf8TestUtil.class.getName());
-
-  // 128 - [chars 0x0000 to 0x007f]
-  static long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;
-
-  // 128
-  static long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT =
-      ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
-
-  // 1920 [chars 0x0080 to 0x07FF]
-  static long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;
-
-  // 18,304
-  static long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
-      // Both bytes are one byte characters
-      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) +
-      // The possible number of two byte characters
-      TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;
-
-  // 2048
-  static long THREE_BYTE_SURROGATES = 2 * 1024;
-
-  // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates]
-  static long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
-      0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;
-
-  // 2,650,112
-  static long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
-      // All one byte characters
-      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) +
-      // One two byte character and a one byte character
-      2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
-          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
-       // Three byte characters
-      THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
-
-  // 1,048,576 [chars 0x10000L to 0x10FFFF]
-  static long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;
-
-  // 289,571,839
-  static long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
-      // All one byte characters
-      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) +
-      // One and three byte characters
-      2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
-          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
-      // Two two byte characters
-      TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS +
-      // Permutations of one and two byte characters
-      3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
-          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
-          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
-      // Four byte characters
-      FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;
-
-  static class Shard {
-    final long index;
-    final long start;
-    final long lim;
-    final long expected;
-
-
-    public Shard(long index, long start, long lim, long expected) {
-      assertTrue(start < lim);
-      this.index = index;
-      this.start = start;
-      this.lim = lim;
-      this.expected = expected;
-    }
-  }
-
-  static final long[] FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES =
-      generateFourByteShardsExpectedRunnables();
-
-  private static long[] generateFourByteShardsExpectedRunnables() {
-    long[] expected = new long[128];
-
-    // 0-63 are all 5300224
-    for (int i = 0; i <= 63; i++) {
-      expected[i] = 5300224;
-    }
-
-    // 97-111 are all 2342912
-    for (int i = 97; i <= 111; i++) {
-     expected[i] = 2342912;
-    }
-
-    // 113-117 are all 1048576
-    for (int i = 113; i <= 117; i++) {
-      expected[i] = 1048576;
-    }
-
-    // One offs
-    expected[112] = 786432;
-    expected[118] = 786432;
-    expected[119] = 1048576;
-    expected[120] = 458752;
-    expected[121] = 524288;
-    expected[122] = 65536;
-
-    // Anything not assigned was the default 0.
-    return expected;
-  }
-
-  static final List<Shard> FOUR_BYTE_SHARDS = generateFourByteShards(
-      128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);
-
-
-  private static List<Shard> generateFourByteShards(
-      int numShards, long[] expected) {
-    assertEquals(numShards, expected.length);
-    List<Shard> shards = new ArrayList<Shard>(numShards);
-    long LIM = 1L << 32;
-    long increment = LIM / numShards;
-    assertTrue(LIM % numShards == 0);
-    for (int i = 0; i < numShards; i++) {
-      shards.add(new Shard(i,
-          increment * i,
-          increment * (i + 1),
-          expected[i]));
-    }
-    return shards;
-  }
-
-  /**
-   * Helper to run the loop to test all the permutations for the number of bytes
-   * specified.
-   *
-   * @param numBytes the number of bytes in the byte array
-   * @param expectedCount the expected number of roundtrippable permutations
-   */
-  static void testBytes(int numBytes, long expectedCount)
-      throws UnsupportedEncodingException {
-    testBytes(numBytes, expectedCount, 0, -1);
-  }
-
-  /**
-   * Helper to run the loop to test all the permutations for the number of bytes
-   * specified. This overload is useful for debugging to get the loop to start
-   * at a certain character.
-   *
-   * @param numBytes the number of bytes in the byte array
-   * @param expectedCount the expected number of roundtrippable permutations
-   * @param start the starting bytes encoded as a long as big-endian
-   * @param lim the limit of bytes to process encoded as a long as big-endian,
-   *     or -1 to mean the max limit for numBytes
-   */
-  static void testBytes(int numBytes, long expectedCount, long start, long lim)
-      throws UnsupportedEncodingException {
-    Random rnd = new Random();
-    byte[] bytes = new byte[numBytes];
-
-    if (lim == -1) {
-      lim = 1L << (numBytes * 8);
-    }
-    long count = 0;
-    long countRoundTripped = 0;
-    for (long byteChar = start; byteChar < lim; byteChar++) {
-      long tmpByteChar = byteChar;
-      for (int i = 0; i < numBytes; i++) {
-        bytes[bytes.length - i - 1] = (byte) tmpByteChar;
-        tmpByteChar = tmpByteChar >> 8;
-      }
-      ByteString bs = ByteString.copyFrom(bytes);
-      boolean isRoundTrippable = bs.isValidUtf8();
-      String s = new String(bytes, "UTF-8");
-      byte[] bytesReencoded = s.getBytes("UTF-8");
-      boolean bytesEqual = Arrays.equals(bytes, bytesReencoded);
-
-      if (bytesEqual != isRoundTrippable) {
-        outputFailure(byteChar, bytes, bytesReencoded);
-      }
-
-      // Check agreement with static Utf8 methods.
-      assertEquals(isRoundTrippable, Utf8.isValidUtf8(bytes));
-      assertEquals(isRoundTrippable, Utf8.isValidUtf8(bytes, 0, numBytes));
-
-      // Test partial sequences.
-      // Partition numBytes into three segments (not necessarily non-empty).
-      int i = rnd.nextInt(numBytes);
-      int j = rnd.nextInt(numBytes);
-      if (j < i) {
-        int tmp = i; i = j; j = tmp;
-      }
-      int state1 = Utf8.partialIsValidUtf8(Utf8.COMPLETE, bytes, 0, i);
-      int state2 = Utf8.partialIsValidUtf8(state1, bytes, i, j);
-      int state3 = Utf8.partialIsValidUtf8(state2, bytes, j, numBytes);
-      if (isRoundTrippable != (state3 == Utf8.COMPLETE)) {
-        System.out.printf("state=%04x %04x %04x i=%d j=%d%n",
-                          state1, state2, state3, i, j);
-        outputFailure(byteChar, bytes, bytesReencoded);
-      }
-      assertEquals(isRoundTrippable, (state3 == Utf8.COMPLETE));
-
-      // Test ropes built out of small partial sequences
-      ByteString rope = RopeByteString.newInstanceForTest(
-          bs.substring(0, i),
-          RopeByteString.newInstanceForTest(
-              bs.substring(i, j),
-              bs.substring(j, numBytes)));
-      assertSame(RopeByteString.class, rope.getClass());
-
-      ByteString[] byteStrings = { bs, bs.substring(0, numBytes), rope };
-      for (ByteString x : byteStrings) {
-        assertEquals(isRoundTrippable,
-                     x.isValidUtf8());
-        assertEquals(state3,
-                     x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes));
-
-        assertEquals(state1,
-                     x.partialIsValidUtf8(Utf8.COMPLETE, 0, i));
-        assertEquals(state1,
-                     x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i));
-        assertEquals(state2,
-                     x.partialIsValidUtf8(state1, i, j - i));
-        assertEquals(state2,
-                     x.substring(i, j).partialIsValidUtf8(state1, 0, j - i));
-        assertEquals(state3,
-                     x.partialIsValidUtf8(state2, j, numBytes - j));
-        assertEquals(state3,
-                     x.substring(j, numBytes)
-                     .partialIsValidUtf8(state2, 0, numBytes - j));
-      }
-
-      // ByteString reduplication should not affect its UTF-8 validity.
-      ByteString ropeADope =
-          RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes));
-      assertEquals(isRoundTrippable, ropeADope.isValidUtf8());
-
-      if (isRoundTrippable) {
-        countRoundTripped++;
-      }
-      count++;
-      if (byteChar != 0 && byteChar % 1000000L == 0) {
-        logger.info("Processed " + (byteChar / 1000000L) +
-            " million characters");
-      }
-    }
-    logger.info("Round tripped " + countRoundTripped + " of " + count);
-    assertEquals(expectedCount, countRoundTripped);
-  }
-
-  /**
-   * Variation of {@link #testBytes} that does less allocation using the
-   * low-level encoders/decoders directly. Checked in because it's useful for
-   * debugging when trying to process bytes faster, but since it doesn't use the
-   * actual String class, it's possible for incompatibilities to develop
-   * (although unlikely).
-   *
-   * @param numBytes the number of bytes in the byte array
-   * @param expectedCount the expected number of roundtrippable permutations
-   * @param start the starting bytes encoded as a long as big-endian
-   * @param lim the limit of bytes to process encoded as a long as big-endian,
-   *     or -1 to mean the max limit for numBytes
-   */
-  void testBytesUsingByteBuffers(
-      int numBytes, long expectedCount, long start, long lim)
-      throws UnsupportedEncodingException {
-    CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
-        .onMalformedInput(CodingErrorAction.REPLACE)
-        .onUnmappableCharacter(CodingErrorAction.REPLACE);
-    CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder()
-        .onMalformedInput(CodingErrorAction.REPLACE)
-        .onUnmappableCharacter(CodingErrorAction.REPLACE);
-    byte[] bytes = new byte[numBytes];
-    int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1;
-    char[] charsDecoded =
-        new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];
-    int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1;
-    byte[] bytesReencoded = new byte[maxBytes];
-
-    ByteBuffer bb = ByteBuffer.wrap(bytes);
-    CharBuffer cb = CharBuffer.wrap(charsDecoded);
-    ByteBuffer bbReencoded = ByteBuffer.wrap(bytesReencoded);
-    if (lim == -1) {
-      lim = 1L << (numBytes * 8);
-    }
-    long count = 0;
-    long countRoundTripped = 0;
-    for (long byteChar = start; byteChar < lim; byteChar++) {
-      bb.rewind();
-      bb.limit(bytes.length);
-      cb.rewind();
-      cb.limit(charsDecoded.length);
-      bbReencoded.rewind();
-      bbReencoded.limit(bytesReencoded.length);
-      encoder.reset();
-      decoder.reset();
-      long tmpByteChar = byteChar;
-      for (int i = 0; i < bytes.length; i++) {
-        bytes[bytes.length - i - 1] = (byte) tmpByteChar;
-        tmpByteChar = tmpByteChar >> 8;
-      }
-      boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8();
-      CoderResult result = decoder.decode(bb, cb, true);
-      assertFalse(result.isError());
-      result = decoder.flush(cb);
-      assertFalse(result.isError());
-
-      int charLen = cb.position();
-      cb.rewind();
-      cb.limit(charLen);
-      result = encoder.encode(cb, bbReencoded, true);
-      assertFalse(result.isError());
-      result = encoder.flush(bbReencoded);
-      assertFalse(result.isError());
-
-      boolean bytesEqual = true;
-      int bytesLen = bbReencoded.position();
-      if (bytesLen != numBytes) {
-        bytesEqual = false;
-      } else {
-        for (int i = 0; i < numBytes; i++) {
-          if (bytes[i] != bytesReencoded[i]) {
-            bytesEqual = false;
-            break;
-          }
-        }
-      }
-      if (bytesEqual != isRoundTrippable) {
-        outputFailure(byteChar, bytes, bytesReencoded, bytesLen);
-      }
-
-      count++;
-      if (isRoundTrippable) {
-        countRoundTripped++;
-      }
-      if (byteChar != 0 && byteChar % 1000000 == 0) {
-        logger.info("Processed " + (byteChar / 1000000) +
-            " million characters");
-      }
-    }
-    logger.info("Round tripped " + countRoundTripped + " of " + count);
-    assertEquals(expectedCount, countRoundTripped);
-  }
-
-  private static void outputFailure(long byteChar, byte[] bytes, byte[] after) {
-    outputFailure(byteChar, bytes, after, after.length);
-  }
-
-  private static void outputFailure(long byteChar, byte[] bytes, byte[] after,
-      int len) {
-    fail("Failure: (" + Long.toHexString(byteChar) + ") " +
-        toHexString(bytes) + " => " + toHexString(after, len));
-  }
-
-  private static String toHexString(byte[] b) {
-    return toHexString(b, b.length);
-  }
-
-  private static String toHexString(byte[] b, int len) {
-    StringBuilder s = new StringBuilder();
-    s.append("\"");
-    for (int i = 0; i < len; i++) {
-      if (i > 0) {
-        s.append(" ");
-      }
-      s.append(String.format("%02x", b[i] & 0xFF));
-    }
-    s.append("\"");
-    return s.toString();
-  }
-
-}