Skip to content

Commit 12e5bcd

Browse files
authored
add method to get column indices created in segment folder (#7297)
This method expose the column indices created in local segment folder, which can be compared with indices in table config to decide which indices to remove
1 parent 43ffa03 commit 12e5bcd

File tree

7 files changed

+134
-28
lines changed

7 files changed

+134
-28
lines changed

pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectory.java

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
import java.io.IOException;
2525
import java.nio.ByteOrder;
2626
import java.util.HashMap;
27+
import java.util.HashSet;
2728
import java.util.Map;
29+
import java.util.Set;
2830
import org.apache.pinot.segment.spi.ColumnMetadata;
2931
import org.apache.pinot.segment.spi.V1Constants;
3032
import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
@@ -38,7 +40,7 @@ class FilePerIndexDirectory extends ColumnIndexDirectory {
3840
private final File _segmentDirectory;
3941
private SegmentMetadataImpl _segmentMetadata;
4042
private final ReadMode _readMode;
41-
private final Map<IndexKey, PinotDataBuffer> indexBuffers = new HashMap<>();
43+
private final Map<IndexKey, PinotDataBuffer> _indexBuffers = new HashMap<>();
4244

4345
/**
4446
* @param segmentDirectory File pointing to segment directory
@@ -88,26 +90,39 @@ public boolean hasIndexFor(String column, ColumnIndexType type) {
8890
@Override
8991
public void close()
9092
throws IOException {
91-
for (PinotDataBuffer dataBuffer : indexBuffers.values()) {
93+
for (PinotDataBuffer dataBuffer : _indexBuffers.values()) {
9294
dataBuffer.close();
9395
}
9496
}
9597

9698
@Override
9799
public void removeIndex(String columnName, ColumnIndexType indexType) {
98100
File indexFile = getFileFor(columnName, indexType);
99-
indexFile.delete();
101+
if (indexFile.delete()) {
102+
_indexBuffers.remove(new IndexKey(columnName, indexType));
103+
}
100104
}
101105

102106
@Override
103107
public boolean isIndexRemovalSupported() {
104108
return true;
105109
}
106110

111+
@Override
112+
public Set<String> getColumnsWithIndex(ColumnIndexType type) {
113+
Set<String> columns = new HashSet<>();
114+
for (IndexKey indexKey : _indexBuffers.keySet()) {
115+
if (indexKey.type == type) {
116+
columns.add(indexKey.name);
117+
}
118+
}
119+
return columns;
120+
}
121+
107122
private PinotDataBuffer getReadBufferFor(IndexKey key)
108123
throws IOException {
109-
if (indexBuffers.containsKey(key)) {
110-
return indexBuffers.get(key);
124+
if (_indexBuffers.containsKey(key)) {
125+
return _indexBuffers.get(key);
111126
}
112127

113128
File file = getFileFor(key.name, key.type);
@@ -117,19 +132,19 @@ private PinotDataBuffer getReadBufferFor(IndexKey key)
117132
.toString());
118133
}
119134
PinotDataBuffer buffer = mapForReads(file, key.type.toString() + ".reader");
120-
indexBuffers.put(key, buffer);
135+
_indexBuffers.put(key, buffer);
121136
return buffer;
122137
}
123138

124139
private PinotDataBuffer getWriteBufferFor(IndexKey key, long sizeBytes)
125140
throws IOException {
126-
if (indexBuffers.containsKey(key)) {
127-
return indexBuffers.get(key);
141+
if (_indexBuffers.containsKey(key)) {
142+
return _indexBuffers.get(key);
128143
}
129144

130145
File filename = getFileFor(key.name, key.type);
131146
PinotDataBuffer buffer = mapForWrites(filename, sizeBytes, key.type.toString() + ".writer");
132-
indexBuffers.put(key, buffer);
147+
_indexBuffers.put(key, buffer);
133148
return buffer;
134149
}
135150

pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SegmentLocalFSDirectory.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
import java.io.IOException;
2525
import java.net.URI;
2626
import java.nio.file.Path;
27+
import java.util.Collections;
28+
import java.util.Set;
2729
import java.util.concurrent.atomic.AtomicLong;
2830
import org.apache.commons.configuration.ConfigurationException;
2931
import org.apache.commons.io.FileUtils;
@@ -167,6 +169,14 @@ public long getDiskSizeBytes() {
167169
}
168170
}
169171

172+
@Override
173+
public Set<String> getColumnsWithIndex(ColumnIndexType type) {
174+
if (_columnIndexDirectory == null) {
175+
return Collections.emptySet();
176+
}
177+
return _columnIndexDirectory.getColumnsWithIndex(type);
178+
}
179+
170180
public Reader createReader()
171181
throws IOException {
172182

pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@
2828
import java.nio.ByteOrder;
2929
import java.util.ArrayList;
3030
import java.util.HashMap;
31+
import java.util.HashSet;
3132
import java.util.List;
3233
import java.util.Map;
34+
import java.util.Set;
3335
import java.util.SortedMap;
3436
import java.util.TreeMap;
3537
import org.apache.commons.configuration.ConfigurationException;
@@ -232,8 +234,8 @@ private void loadMap()
232234
String propertyName = key.substring(lastSeparatorPos + 1);
233235

234236
int indexSeparatorPos = key.lastIndexOf(MAP_KEY_SEPARATOR, lastSeparatorPos - 1);
235-
Preconditions
236-
.checkState(indexSeparatorPos != -1, "Index separator not found: " + key + " , segment: " + _segmentDirectory);
237+
Preconditions.checkState(indexSeparatorPos != -1,
238+
"Index separator not found: " + key + " , segment: " + _segmentDirectory);
237239
String indexName = key.substring(indexSeparatorPos + 1, lastSeparatorPos);
238240
String columnName = key.substring(0, indexSeparatorPos);
239241
IndexKey indexKey = new IndexKey(columnName, ColumnIndexType.getValue(indexName));
@@ -370,6 +372,17 @@ public boolean isIndexRemovalSupported() {
370372
return false;
371373
}
372374

375+
@Override
376+
public Set<String> getColumnsWithIndex(ColumnIndexType type) {
377+
Set<String> columns = new HashSet<>();
378+
for (IndexKey indexKey : _columnEntries.keySet()) {
379+
if (indexKey.type == type) {
380+
columns.add(indexKey.name);
381+
}
382+
}
383+
return columns;
384+
}
385+
373386
@Override
374387
public String toString() {
375388
return _segmentDirectory.toString() + "/" + _indexFile.toString();

pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020

2121
import java.io.File;
2222
import java.io.IOException;
23+
import java.util.Arrays;
24+
import java.util.Collections;
25+
import java.util.HashSet;
2326
import org.apache.commons.io.FileUtils;
2427
import org.apache.pinot.segment.spi.creator.SegmentVersion;
2528
import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
@@ -28,11 +31,14 @@
2831
import org.apache.pinot.segment.spi.store.ColumnIndexType;
2932
import org.apache.pinot.spi.utils.ReadMode;
3033
import org.apache.pinot.util.TestUtils;
31-
import org.testng.Assert;
3234
import org.testng.annotations.AfterMethod;
3335
import org.testng.annotations.BeforeMethod;
3436
import org.testng.annotations.Test;
3537

38+
import static org.testng.Assert.assertEquals;
39+
import static org.testng.Assert.assertFalse;
40+
import static org.testng.Assert.assertTrue;
41+
3642

3743
public class FilePerIndexDirectoryTest {
3844
private static final File TEMP_DIR =
@@ -60,24 +66,24 @@ public void tearDown()
6066
@Test
6167
public void testEmptyDirectory()
6268
throws Exception {
63-
Assert.assertEquals(0, TEMP_DIR.list().length, TEMP_DIR.list().toString());
69+
assertEquals(0, TEMP_DIR.list().length, TEMP_DIR.list().toString());
6470
try (FilePerIndexDirectory fpiDir = new FilePerIndexDirectory(TEMP_DIR, segmentMetadata, ReadMode.heap);
6571
PinotDataBuffer buffer = fpiDir.newBuffer("col1", ColumnIndexType.DICTIONARY, 1024)) {
66-
Assert.assertEquals(1, TEMP_DIR.list().length, TEMP_DIR.list().toString());
72+
assertEquals(1, TEMP_DIR.list().length, TEMP_DIR.list().toString());
6773

6874
buffer.putLong(0, 0xbadfadL);
6975
buffer.putInt(8, 51);
7076
// something at random location
7177
buffer.putInt(101, 55);
7278
}
7379

74-
Assert.assertEquals(1, TEMP_DIR.list().length);
80+
assertEquals(1, TEMP_DIR.list().length);
7581

7682
try (FilePerIndexDirectory colDir = new FilePerIndexDirectory(TEMP_DIR, segmentMetadata, ReadMode.mmap);
7783
PinotDataBuffer readBuffer = colDir.getBuffer("col1", ColumnIndexType.DICTIONARY)) {
78-
Assert.assertEquals(readBuffer.getLong(0), 0xbadfadL);
79-
Assert.assertEquals(readBuffer.getInt(8), 51);
80-
Assert.assertEquals(readBuffer.getInt(101), 55);
84+
assertEquals(readBuffer.getLong(0), 0xbadfadL);
85+
assertEquals(readBuffer.getInt(8), 51);
86+
assertEquals(readBuffer.getInt(101), 55);
8187
}
8288
}
8389

@@ -139,7 +145,7 @@ public void testHasIndex()
139145
try (FilePerIndexDirectory fpiDirectory = new FilePerIndexDirectory(TEMP_DIR, segmentMetadata, ReadMode.mmap)) {
140146
PinotDataBuffer buffer = fpiDirectory.newBuffer("foo", ColumnIndexType.DICTIONARY, 1024);
141147
buffer.putInt(0, 100);
142-
Assert.assertTrue(fpiDirectory.hasIndexFor("foo", ColumnIndexType.DICTIONARY));
148+
assertTrue(fpiDirectory.hasIndexFor("foo", ColumnIndexType.DICTIONARY));
143149
}
144150
}
145151

@@ -149,11 +155,37 @@ public void testRemoveIndex()
149155
try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, segmentMetadata, ReadMode.mmap)) {
150156
fpi.newBuffer("col1", ColumnIndexType.FORWARD_INDEX, 1024);
151157
fpi.newBuffer("col2", ColumnIndexType.DICTIONARY, 100);
152-
Assert.assertTrue(fpi.getFileFor("col1", ColumnIndexType.FORWARD_INDEX).exists());
153-
Assert.assertTrue(fpi.getFileFor("col2", ColumnIndexType.DICTIONARY).exists());
154-
Assert.assertTrue(fpi.isIndexRemovalSupported());
158+
assertTrue(fpi.getFileFor("col1", ColumnIndexType.FORWARD_INDEX).exists());
159+
assertTrue(fpi.getFileFor("col2", ColumnIndexType.DICTIONARY).exists());
160+
assertTrue(fpi.isIndexRemovalSupported());
161+
fpi.removeIndex("col1", ColumnIndexType.FORWARD_INDEX);
162+
assertFalse(fpi.getFileFor("col1", ColumnIndexType.FORWARD_INDEX).exists());
163+
}
164+
}
165+
166+
@Test
167+
public void testGetColumnIndices()
168+
throws IOException {
169+
try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, segmentMetadata, ReadMode.mmap)) {
170+
fpi.newBuffer("col1", ColumnIndexType.FORWARD_INDEX, 1024);
171+
fpi.newBuffer("col2", ColumnIndexType.DICTIONARY, 100);
172+
fpi.newBuffer("col3", ColumnIndexType.FORWARD_INDEX, 1024);
173+
fpi.newBuffer("col4", ColumnIndexType.INVERTED_INDEX, 100);
174+
175+
assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
176+
new HashSet<>(Arrays.asList("col1", "col3")));
177+
assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.DICTIONARY),
178+
new HashSet<>(Collections.singletonList("col2")));
179+
assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
180+
new HashSet<>(Collections.singletonList("col4")));
181+
155182
fpi.removeIndex("col1", ColumnIndexType.FORWARD_INDEX);
156-
Assert.assertFalse(fpi.getFileFor("col1", ColumnIndexType.FORWARD_INDEX).exists());
183+
fpi.removeIndex("col2", ColumnIndexType.DICTIONARY);
184+
fpi.removeIndex("col111", ColumnIndexType.DICTIONARY);
185+
assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
186+
new HashSet<>(Collections.singletonList("col3")));
187+
assertEquals(fpi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
188+
new HashSet<>(Collections.singletonList("col4")));
157189
}
158190
}
159191
}

pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.io.File;
2222
import java.io.IOException;
2323
import java.util.Arrays;
24+
import java.util.Collections;
2425
import java.util.HashSet;
2526
import org.apache.commons.configuration.ConfigurationException;
2627
import org.apache.commons.io.FileUtils;
@@ -32,11 +33,13 @@
3233
import org.apache.pinot.spi.utils.ReadMode;
3334
import org.apache.pinot.util.TestUtils;
3435
import org.mockito.Mockito;
35-
import org.testng.Assert;
3636
import org.testng.annotations.AfterMethod;
3737
import org.testng.annotations.BeforeMethod;
3838
import org.testng.annotations.Test;
3939

40+
import static org.testng.Assert.assertEquals;
41+
import static org.testng.Assert.assertFalse;
42+
4043

4144
public class SingleFileIndexDirectoryTest {
4245
private static final File TEMP_DIR =
@@ -75,7 +78,7 @@ void writeMetadata() {
7578
public void testWithEmptyDir()
7679
throws Exception {
7780
// segmentDir does not have anything to begin with
78-
Assert.assertEquals(TEMP_DIR.list().length, 0);
81+
assertEquals(TEMP_DIR.list().length, 0);
7982
SingleFileIndexDirectory columnDirectory = new SingleFileIndexDirectory(TEMP_DIR, segmentMetadata, ReadMode.mmap);
8083
PinotDataBuffer writtenBuffer = columnDirectory.newBuffer("foo", ColumnIndexType.DICTIONARY, 1024);
8184
String data = "This is a test string";
@@ -89,11 +92,11 @@ public void testWithEmptyDir()
8992
Mockito.when(segmentMetadata.getAllColumns()).thenReturn(new HashSet<String>(Arrays.asList("foo")));
9093
try (SingleFileIndexDirectory directoryReader = new SingleFileIndexDirectory(TEMP_DIR, segmentMetadata,
9194
ReadMode.mmap); PinotDataBuffer readBuffer = directoryReader.getBuffer("foo", ColumnIndexType.DICTIONARY)) {
92-
Assert.assertEquals(1024, readBuffer.size());
95+
assertEquals(1024, readBuffer.size());
9396
int length = dataBytes.length;
9497
for (int i = 0; i < length; i++) {
9598
byte b = readBuffer.getByte(i);
96-
Assert.assertEquals(dataBytes[i], b);
99+
assertEquals(dataBytes[i], b);
97100
}
98101
}
99102
}
@@ -161,8 +164,28 @@ public void testRemoveIndex()
161164
throws IOException, ConfigurationException {
162165
try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR, segmentMetadata, ReadMode.mmap)) {
163166
sfd.newBuffer("col1", ColumnIndexType.DICTIONARY, 1024);
164-
Assert.assertFalse(sfd.isIndexRemovalSupported());
167+
assertFalse(sfd.isIndexRemovalSupported());
165168
sfd.removeIndex("col1", ColumnIndexType.DICTIONARY);
166169
}
167170
}
171+
172+
@Test
173+
public void testGetColumnIndices()
174+
throws Exception {
175+
try (SingleFileIndexDirectory spi = new SingleFileIndexDirectory(TEMP_DIR, segmentMetadata, ReadMode.mmap)) {
176+
spi.newBuffer("col1", ColumnIndexType.FORWARD_INDEX, 1024);
177+
spi.newBuffer("col2", ColumnIndexType.DICTIONARY, 100);
178+
spi.newBuffer("col3", ColumnIndexType.FORWARD_INDEX, 1024);
179+
spi.newBuffer("col4", ColumnIndexType.INVERTED_INDEX, 100);
180+
181+
assertEquals(spi.getColumnsWithIndex(ColumnIndexType.FORWARD_INDEX),
182+
new HashSet<>(Arrays.asList("col1", "col3")));
183+
assertEquals(spi.getColumnsWithIndex(ColumnIndexType.DICTIONARY),
184+
new HashSet<>(Collections.singletonList("col2")));
185+
assertEquals(spi.getColumnsWithIndex(ColumnIndexType.INVERTED_INDEX),
186+
new HashSet<>(Collections.singletonList("col4")));
187+
// TODO: implement removeIndex and test it in next RP
188+
// spi.removeIndex("col1", ColumnIndexType.FORWARD_INDEX);
189+
}
190+
}
168191
}

pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/ColumnIndexDirectory.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.io.Closeable;
2222
import java.io.File;
2323
import java.io.IOException;
24+
import java.util.Set;
2425
import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
2526
import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
2627

@@ -85,6 +86,12 @@ public abstract PinotDataBuffer newBuffer(String column, ColumnIndexType type, l
8586
*/
8687
public abstract boolean isIndexRemovalSupported();
8788

89+
/**
90+
* Get the columns with specific index type, loaded by column index directory.
91+
* @return a set of columns with such index type.
92+
*/
93+
public abstract Set<String> getColumnsWithIndex(ColumnIndexType type);
94+
8895
/**
8996
* Fetch the buffer for this column
9097
*/

pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectory.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ public abstract void reloadMetadata()
108108

109109
public abstract long getDiskSizeBytes();
110110

111+
/**
112+
* Get the columns with specific index type, in this local segment directory.
113+
* @return a set of columns with such index type.
114+
*/
115+
public abstract Set<String> getColumnsWithIndex(ColumnIndexType type);
116+
111117
/**
112118
* This is a hint to the the implementation, to prefetch buffers for specified columns
113119
* @param columns columns to prefetch

0 commit comments

Comments
 (0)