Skip to content

Commit 95c8fb6

Browse files
committed
avoid wasting 64K per empty buffer.
1 parent 23db4eb commit 95c8fb6

File tree

1 file changed

+24
-19
lines changed

1 file changed

+24
-19
lines changed

parquet-encoding/src/main/java/parquet/bytes/CapacityByteArrayOutputStream.java

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
*/
1616
package parquet.bytes;
1717

18+
import static java.lang.Math.max;
19+
import static java.lang.String.format;
20+
import static java.lang.System.arraycopy;
21+
1822
import java.io.ByteArrayOutputStream;
1923
import java.io.IOException;
2024
import java.io.OutputStream;
@@ -40,6 +44,7 @@ public class CapacityByteArrayOutputStream extends OutputStream {
4044

4145
private static final int MINIMUM_SLAB_SIZE = 64 * 1024;
4246
private static final int EXPONENTIAL_SLAB_SIZE_THRESHOLD = 10;
47+
private static final byte[] EMPTY_SLAB = new byte[0];
4348

4449
private int slabSize;
4550
private List<byte[]> slabs = new ArrayList<byte[]>();
@@ -61,24 +66,24 @@ private void initSlabs(int initialSize) {
6166
if (Log.DEBUG) LOG.debug(String.format("initial slab of size %d", initialSize));
6267
this.slabSize = initialSize;
6368
this.slabs.clear();
64-
this.capacity = initialSize;
65-
this.currentSlab = new byte[slabSize];
66-
this.slabs.add(currentSlab);
67-
this.currentSlabIndex = 0;
69+
this.capacity = 0;
70+
this.currentSlab = EMPTY_SLAB;
71+
this.currentSlabIndex = -1;
6872
this.currentSlabPosition = 0;
6973
this.size = 0;
7074
}
7175

7276
private void addSlab(int minimumSize) {
77+
minimumSize = max(minimumSize, MINIMUM_SLAB_SIZE);
7378
this.currentSlabIndex += 1;
7479
if (currentSlabIndex < this.slabs.size()) {
7580
// reuse existing slab
7681
this.currentSlab = this.slabs.get(currentSlabIndex);
77-
if (Log.DEBUG) LOG.debug(String.format("reusing slab of size %d", currentSlab.length));
82+
if (Log.DEBUG) LOG.debug(format("reusing slab of size %d", currentSlab.length));
7883
if (currentSlab.length < minimumSize) {
79-
if (Log.DEBUG) LOG.debug(String.format("slab size %,d too small for value of size %,d. replacing slab", currentSlab.length, minimumSize));
84+
if (Log.DEBUG) LOG.debug(format("slab size %,d too small for value of size %,d. replacing slab", currentSlab.length, minimumSize));
8085
byte[] newSlab = new byte[minimumSize];
81-
capacity += minimumSize - currentSlab.length;
86+
capacity += newSlab.length - currentSlab.length;
8287
this.currentSlab = newSlab;
8388
this.slabs.set(currentSlabIndex, newSlab);
8489
}
@@ -87,13 +92,13 @@ private void addSlab(int minimumSize) {
8792
// make slabs bigger in case we are creating too many of them
8893
// double slab size every time.
8994
this.slabSize = size;
90-
if (Log.DEBUG) LOG.debug(String.format("used %d slabs, new slab size %d", currentSlabIndex, slabSize));
95+
if (Log.DEBUG) LOG.debug(format("used %d slabs, new slab size %d", currentSlabIndex, slabSize));
9196
}
9297
if (slabSize < minimumSize) {
93-
if (Log.DEBUG) LOG.debug(String.format("slab size %,d too small for value of size %,d. Bumping up slab size", slabSize, minimumSize));
98+
if (Log.DEBUG) LOG.debug(format("slab size %,d too small for value of size %,d. Bumping up slab size", slabSize, minimumSize));
9499
this.slabSize = minimumSize;
95100
}
96-
if (Log.DEBUG) LOG.debug(String.format("new slab of size %d", slabSize));
101+
if (Log.DEBUG) LOG.debug(format("new slab of size %d", slabSize));
97102
this.currentSlab = new byte[slabSize];
98103
this.slabs.add(currentSlab);
99104
this.capacity += slabSize;
@@ -119,13 +124,13 @@ public void write(byte b[], int off, int len) {
119124
}
120125
if (currentSlabPosition + len >= currentSlab.length) {
121126
final int length1 = currentSlab.length - currentSlabPosition;
122-
System.arraycopy(b, off, currentSlab, currentSlabPosition, length1);
127+
arraycopy(b, off, currentSlab, currentSlabPosition, length1);
123128
final int length2 = len - length1;
124129
addSlab(length2);
125-
System.arraycopy(b, off + length1, currentSlab, currentSlabPosition, length2);
130+
arraycopy(b, off + length1, currentSlab, currentSlabPosition, length2);
126131
currentSlabPosition = length2;
127132
} else {
128-
System.arraycopy(b, off, currentSlab, currentSlabPosition, len);
133+
arraycopy(b, off, currentSlab, currentSlabPosition, len);
129134
currentSlabPosition += len;
130135
}
131136
size += len;
@@ -174,8 +179,8 @@ public void reset() {
174179
(currentSlabIndex > EXPONENTIAL_SLAB_SIZE_THRESHOLD)
175180
){
176181
// readjust slab size
177-
initSlabs(Math.max(size / 5, MINIMUM_SLAB_SIZE)); // should make overhead to about 20% without incurring many slabs
178-
if (Log.DEBUG) LOG.debug(String.format("used %d slabs, new slab size %d", currentSlabIndex + 1, slabSize));
182+
initSlabs(max(size / 5, MINIMUM_SLAB_SIZE)); // should make overhead to about 20% without incurring many slabs
183+
if (Log.DEBUG) LOG.debug(format("used %d slabs, new slab size %d", currentSlabIndex + 1, slabSize));
179184
} else if (currentSlabIndex < slabs.size() - 1) {
180185
// free up the slabs that we are not using. We want to minimize overhead
181186
this.slabs = new ArrayList<byte[]>(slabs.subList(0, currentSlabIndex + 1));
@@ -184,9 +189,9 @@ public void reset() {
184189
capacity += slab.length;
185190
}
186191
}
187-
this.currentSlabIndex = 0;
192+
this.currentSlabIndex = -1;
188193
this.currentSlabPosition = 0;
189-
this.currentSlab = slabs.get(currentSlabIndex);
194+
this.currentSlab = EMPTY_SLAB;
190195
this.size = 0;
191196
}
192197

@@ -217,7 +222,7 @@ public void setByte(long index, byte value) {
217222
"Index: " + index + " is >= the current size of: " + size);
218223

219224
long seen = 0;
220-
for (int i = 0; i <=currentSlabIndex; i++) {
225+
for (int i = 0; i <= currentSlabIndex; i++) {
221226
byte[] slab = slabs.get(i);
222227
if (index < seen + slab.length) {
223228
// ok found index
@@ -233,7 +238,7 @@ public void setByte(long index, byte value) {
233238
* @return a text representation of the memory usage of this structure
234239
*/
235240
public String memUsageString(String prefix) {
236-
return String.format("%s %s %d slabs, %,d bytes", prefix, getClass().getSimpleName(), slabs.size(), getCapacity());
241+
return format("%s %s %d slabs, %,d bytes", prefix, getClass().getSimpleName(), slabs.size(), getCapacity());
237242
}
238243

239244
/**

0 commit comments

Comments
 (0)