diff options
Diffstat (limited to 'java/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java')
-rw-r--r-- | java/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java | 259 |
1 files changed, 28 insertions, 231 deletions
diff --git a/java/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java b/java/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java index 324a63f..88df38d 100644 --- a/java/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java +++ b/java/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java @@ -31,9 +31,7 @@ package com.google.protobuf.nano; import java.io.IOException; -import java.nio.BufferOverflowException; -import java.nio.ByteBuffer; -import java.nio.ReadOnlyBufferException; +import java.io.UnsupportedEncodingException; /** * Encodes and writes protocol message fields. @@ -50,17 +48,15 @@ import java.nio.ReadOnlyBufferException; * @author kneton@google.com Kenton Varda */ public final class CodedOutputByteBufferNano { - /* max bytes per java UTF-16 char in UTF-8 */ - private static final int MAX_UTF8_EXPANSION = 3; - private final ByteBuffer buffer; + private final byte[] buffer; + private final int limit; + private int position; private CodedOutputByteBufferNano(final byte[] buffer, final int offset, final int length) { - this(ByteBuffer.wrap(buffer, offset, length)); - } - - private CodedOutputByteBufferNano(final ByteBuffer buffer) { this.buffer = buffer; + position = offset; + limit = offset + length; } /** @@ -292,203 +288,14 @@ public final class CodedOutputByteBufferNano { /** Write a {@code string} field to the stream. */ public void writeStringNoTag(final String value) throws IOException { - // UTF-8 byte length of the string is at least its UTF-16 code unit length (value.length()), - // and at most 3 times of it. Optimize for the case where we know this length results in a - // constant varint length - saves measuring length of the string. - try { - final int minLengthVarIntSize = computeRawVarint32Size(value.length()); - final int maxLengthVarIntSize = computeRawVarint32Size(value.length() * MAX_UTF8_EXPANSION); - if (minLengthVarIntSize == maxLengthVarIntSize) { - int oldPosition = buffer.position(); - buffer.position(oldPosition + minLengthVarIntSize); - encode(value, buffer); - int newPosition = buffer.position(); - buffer.position(oldPosition); - writeRawVarint32(newPosition - oldPosition - minLengthVarIntSize); - buffer.position(newPosition); - } else { - writeRawVarint32(encodedLength(value)); - encode(value, buffer); - } - } catch (BufferOverflowException e) { - throw new OutOfSpaceException(buffer.position(), buffer.limit()); - } - } - - // These UTF-8 handling methods are copied from Guava's Utf8 class. - /** - * Returns the number of bytes in the UTF-8-encoded form of {@code sequence}. For a string, - * this method is equivalent to {@code string.getBytes(UTF_8).length}, but is more efficient in - * both time and space. - * - * @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired - * surrogates) - */ - private static int encodedLength(CharSequence sequence) { - // Warning to maintainers: this implementation is highly optimized. - int utf16Length = sequence.length(); - int utf8Length = utf16Length; - int i = 0; - - // This loop optimizes for pure ASCII. - while (i < utf16Length && sequence.charAt(i) < 0x80) { - i++; - } - - // This loop optimizes for chars less than 0x800. - for (; i < utf16Length; i++) { - char c = sequence.charAt(i); - if (c < 0x800) { - utf8Length += ((0x7f - c) >>> 31); // branch free! - } else { - utf8Length += encodedLengthGeneral(sequence, i); - break; - } - } - - if (utf8Length < utf16Length) { - // Necessary and sufficient condition for overflow because of maximum 3x expansion - throw new IllegalArgumentException("UTF-8 length does not fit in int: " - + (utf8Length + (1L << 32))); - } - return utf8Length; - } - - private static int encodedLengthGeneral(CharSequence sequence, int start) { - int utf16Length = sequence.length(); - int utf8Length = 0; - for (int i = start; i < utf16Length; i++) { - char c = sequence.charAt(i); - if (c < 0x800) { - utf8Length += (0x7f - c) >>> 31; // branch free! - } else { - utf8Length += 2; - // jdk7+: if (Character.isSurrogate(c)) { - if (Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) { - // Check that we have a well-formed surrogate pair. - int cp = Character.codePointAt(sequence, i); - if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) { - throw new IllegalArgumentException("Unpaired surrogate at index " + i); - } - i++; - } - } - } - return utf8Length; - } - - /** - * Encodes {@code sequence} into UTF-8, in {@code byteBuffer}. For a string, this method is - * equivalent to {@code buffer.put(string.getBytes(UTF_8))}, but is more efficient in both time - * and space. Bytes are written starting at the current position. This method requires paired - * surrogates, and therefore does not support chunking. - * - * <p>To ensure sufficient space in the output buffer, either call {@link #encodedLength} to - * compute the exact amount needed, or leave room for {@code 3 * sequence.length()}, which is the - * largest possible number of bytes that any input can be encoded to. - * - * @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired - * surrogates) - * @throws BufferOverflowException if {@code sequence} encoded in UTF-8 does not fit in - * {@code byteBuffer}'s remaining space. - * @throws ReadOnlyBufferException if {@code byteBuffer} is a read-only buffer. - */ - private static void encode(CharSequence sequence, ByteBuffer byteBuffer) { - if (byteBuffer.isReadOnly()) { - throw new ReadOnlyBufferException(); - } else if (byteBuffer.hasArray()) { - try { - int encoded = encode(sequence, - byteBuffer.array(), - byteBuffer.arrayOffset() + byteBuffer.position(), - byteBuffer.remaining()); - byteBuffer.position(encoded - byteBuffer.arrayOffset()); - } catch (ArrayIndexOutOfBoundsException e) { - BufferOverflowException boe = new BufferOverflowException(); - boe.initCause(e); - throw boe; - } - } else { - encodeDirect(sequence, byteBuffer); - } - } - - private static void encodeDirect(CharSequence sequence, ByteBuffer byteBuffer) { - int utf16Length = sequence.length(); - for (int i = 0; i < utf16Length; i++) { - final char c = sequence.charAt(i); - if (c < 0x80) { // ASCII - byteBuffer.put((byte) c); - } else if (c < 0x800) { // 11 bits, two UTF-8 bytes - byteBuffer.put((byte) ((0xF << 6) | (c >>> 6))); - byteBuffer.put((byte) (0x80 | (0x3F & c))); - } else if (c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) { - // Maximium single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes - byteBuffer.put((byte) ((0xF << 5) | (c >>> 12))); - byteBuffer.put((byte) (0x80 | (0x3F & (c >>> 6)))); - byteBuffer.put((byte) (0x80 | (0x3F & c))); - } else { - final char low; - if (i + 1 == sequence.length() - || !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) { - throw new IllegalArgumentException("Unpaired surrogate at index " + (i - 1)); - } - int codePoint = Character.toCodePoint(c, low); - byteBuffer.put((byte) ((0xF << 4) | (codePoint >>> 18))); - byteBuffer.put((byte) (0x80 | (0x3F & (codePoint >>> 12)))); - byteBuffer.put((byte) (0x80 | (0x3F & (codePoint >>> 6)))); - byteBuffer.put((byte) (0x80 | (0x3F & codePoint))); - } - } - } - - private static int encode(CharSequence sequence, byte[] bytes, int offset, int length) { - int utf16Length = sequence.length(); - int j = offset; - int i = 0; - int limit = offset + length; - // Designed to take advantage of - // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination - for (char c; i < utf16Length && i + j < limit && (c = sequence.charAt(i)) < 0x80; i++) { - bytes[j + i] = (byte) c; - } - if (i == utf16Length) { - return j + utf16Length; - } - j += i; - for (char c; i < utf16Length; i++) { - c = sequence.charAt(i); - if (c < 0x80 && j < limit) { - bytes[j++] = (byte) c; - } else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes - bytes[j++] = (byte) ((0xF << 6) | (c >>> 6)); - bytes[j++] = (byte) (0x80 | (0x3F & c)); - } else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) { - // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes - bytes[j++] = (byte) ((0xF << 5) | (c >>> 12)); - bytes[j++] = (byte) (0x80 | (0x3F & (c >>> 6))); - bytes[j++] = (byte) (0x80 | (0x3F & c)); - } else if (j <= limit - 4) { - // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8 bytes - final char low; - if (i + 1 == sequence.length() - || !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) { - throw new IllegalArgumentException("Unpaired surrogate at index " + (i - 1)); - } - int codePoint = Character.toCodePoint(c, low); - bytes[j++] = (byte) ((0xF << 4) | (codePoint >>> 18)); - bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12))); - bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6))); - bytes[j++] = (byte) (0x80 | (0x3F & codePoint)); - } else { - throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j); - } - } - return j; + // Unfortunately there does not appear to be any way to tell Java to encode + // UTF-8 directly into our buffer, so we have to let it create its own byte + // array and then copy. + final byte[] bytes = value.getBytes("UTF-8"); + writeRawVarint32(bytes.length); + writeRawBytes(bytes); } - // End guava UTF-8 methods - /** Write a {@code group} field to the stream. */ public void writeGroupNoTag(final MessageNano value) throws IOException { value.writeTo(this); @@ -796,8 +603,13 @@ public final class CodedOutputByteBufferNano { * {@code string} field. */ public static int computeStringSizeNoTag(final String value) { - final int length = encodedLength(value); - return computeRawVarint32Size(length) + length; + try { + final byte[] bytes = value.getBytes("UTF-8"); + return computeRawVarint32Size(bytes.length) + + bytes.length; + } catch (UnsupportedEncodingException e) { + throw new RuntimeException("UTF-8 not supported."); + } } /** @@ -880,7 +692,7 @@ public final class CodedOutputByteBufferNano { * Otherwise, throws {@code UnsupportedOperationException}. */ public int spaceLeft() { - return buffer.remaining(); + return limit - position; } /** @@ -898,23 +710,6 @@ public final class CodedOutputByteBufferNano { } /** - * Returns the position within the internal buffer. - */ - public int position() { - return buffer.position(); - } - - /** - * Resets the position within the internal buffer to zero. - * - * @see #position - * @see #spaceLeft - */ - public void reset() { - buffer.clear(); - } - - /** * If you create a CodedOutputStream around a simple flat array, you must * not attempt to write more bytes than the array has space. Otherwise, * this exception will be thrown. @@ -930,12 +725,12 @@ public final class CodedOutputByteBufferNano { /** Write a single byte. */ public void writeRawByte(final byte value) throws IOException { - if (!buffer.hasRemaining()) { + if (position == limit) { // We're writing to a single buffer. - throw new OutOfSpaceException(buffer.position(), buffer.limit()); + throw new OutOfSpaceException(position, limit); } - buffer.put(value); + buffer[position++] = value; } /** Write a single byte, represented by an integer value. */ @@ -951,11 +746,13 @@ public final class CodedOutputByteBufferNano { /** Write part of an array of bytes. */ public void writeRawBytes(final byte[] value, int offset, int length) throws IOException { - if (buffer.remaining() >= length) { - buffer.put(value, offset, length); + if (limit - position >= length) { + // We have room in the current buffer. + System.arraycopy(value, offset, buffer, position, length); + position += length; } else { // We're writing to a single buffer. - throw new OutOfSpaceException(buffer.position(), buffer.limit()); + throw new OutOfSpaceException(position, limit); } } |