Skip to content

Commit 7cd53e4

Browse files
authored
Allow quickstart to get table files from filesystem. (#8093)
* Allow quickstart to get table files from filesystem. * Move bootstrapTableDir accessor methods to QuickStartBase.java. * Rebuild. * Cleanup.
1 parent dd73ee7 commit 7cd53e4

File tree

3 files changed

+119
-26
lines changed

3 files changed

+119
-26
lines changed

pinot-tools/src/main/java/org/apache/pinot/tools/QuickStartBase.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import com.google.common.collect.ImmutableMap;
2222
import java.io.File;
23+
import java.nio.file.Paths;
2324
import java.util.List;
2425
import java.util.Map;
2526
import org.apache.commons.configuration.ConfigurationException;
@@ -31,6 +32,7 @@
3132

3233
public abstract class QuickStartBase {
3334
protected File _dataDir = FileUtils.getTempDirectory();
35+
protected String _bootstrapDataDir;
3436
protected String _zkExternalAddress;
3537
protected String _configFilePath;
3638

@@ -39,6 +41,37 @@ public QuickStartBase setDataDir(String dataDir) {
3941
return this;
4042
}
4143

44+
public QuickStartBase setBootstrapDataDir(String bootstrapDataDir) {
45+
_bootstrapDataDir = bootstrapDataDir;
46+
return this;
47+
}
48+
49+
/**
50+
* Assuming that database name is DBNAME, bootstrap path must have the file structure specified below to properly
51+
* load the table:
52+
* DBNAME
53+
* ├── ingestionJobSpec.yaml
54+
* ├── rawdata
55+
* │ └── DBNAME_data.csv
56+
* ├── DBNAME_offline_table_config.json
57+
* └── DBNAME_schema.json
58+
*
59+
* @return bootstrap path if specified by command line argument -bootstrapTableDir; otherwise, default.
60+
*/
61+
public String getBootstrapDataDir(String bootstrapDataDir) {
62+
return _bootstrapDataDir != null ? _bootstrapDataDir : bootstrapDataDir;
63+
}
64+
65+
/** @return Table name if specified by command line argument -bootstrapTableDir; otherwise, default. */
66+
public String getTableName(String bootstrapDataDir) {
67+
return Paths.get(getBootstrapDataDir(bootstrapDataDir)).getFileName().toString();
68+
}
69+
70+
/** @return true if bootstrapTableDir is not specified by command line argument -bootstrapTableDir, else false.*/
71+
public boolean useDefaultBootstrapTableDir() {
72+
return _bootstrapDataDir == null;
73+
}
74+
4275
public QuickStartBase setZkExternalAddress(String zkExternalAddress) {
4376
_zkExternalAddress = zkExternalAddress;
4477
return this;

pinot-tools/src/main/java/org/apache/pinot/tools/Quickstart.java

Lines changed: 70 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.google.common.base.Preconditions;
2323
import com.google.common.collect.Lists;
2424
import java.io.File;
25+
import java.io.IOException;
2526
import java.net.URL;
2627
import java.util.ArrayList;
2728
import java.util.Arrays;
@@ -39,6 +40,7 @@ public List<String> types() {
3940

4041
private static final String TAB = "\t\t";
4142
private static final String NEW_LINE = "\n";
43+
private static final String DEFAULT_BOOTSTRAP_DIRECTORY = "examples/batch/baseballStats";
4244

4345
public enum Color {
4446
RESET("\u001B[0m"), GREEN("\u001B[32m"), YELLOW("\u001B[33m"), CYAN("\u001B[36m");
@@ -54,10 +56,6 @@ public String getCode() {
5456
}
5557
}
5658

57-
public String getBootstrapDataDir() {
58-
return "examples/batch/baseballStats";
59-
}
60-
6159
public int getNumMinions() {
6260
return 0;
6361
}
@@ -91,30 +89,17 @@ public static String prettyPrintResponse(JsonNode response) {
9189

9290
public void execute()
9391
throws Exception {
92+
String tableName = getTableName(DEFAULT_BOOTSTRAP_DIRECTORY);
9493
File quickstartTmpDir = new File(_dataDir, String.valueOf(System.currentTimeMillis()));
95-
File baseDir = new File(quickstartTmpDir, "baseballStats");
94+
File baseDir = new File(quickstartTmpDir, tableName);
9695
File dataDir = new File(baseDir, "rawdata");
9796
Preconditions.checkState(dataDir.mkdirs());
9897

99-
File schemaFile = new File(baseDir, "baseballStats_schema.json");
100-
File tableConfigFile = new File(baseDir, "baseballStats_offline_table_config.json");
101-
File ingestionJobSpecFile = new File(baseDir, "ingestionJobSpec.yaml");
102-
File dataFile = new File(dataDir, "baseballStats_data.csv");
103-
104-
ClassLoader classLoader = Quickstart.class.getClassLoader();
105-
URL resource = classLoader.getResource(getBootstrapDataDir() + "/baseballStats_schema.json");
106-
com.google.common.base.Preconditions.checkNotNull(resource);
107-
FileUtils.copyURLToFile(resource, schemaFile);
108-
resource = classLoader.getResource(getBootstrapDataDir() + "/rawdata/baseballStats_data.csv");
109-
com.google.common.base.Preconditions.checkNotNull(resource);
110-
FileUtils.copyURLToFile(resource, dataFile);
111-
resource = classLoader.getResource(getBootstrapDataDir() + "/ingestionJobSpec.yaml");
112-
if (resource != null) {
113-
FileUtils.copyURLToFile(resource, ingestionJobSpecFile);
98+
if (useDefaultBootstrapTableDir()) {
99+
copyResourceTableToTmpDirectory(getBootstrapDataDir(DEFAULT_BOOTSTRAP_DIRECTORY), tableName, baseDir, dataDir);
100+
} else {
101+
copyFilesystemTableToTmpDirectory(getBootstrapDataDir(DEFAULT_BOOTSTRAP_DIRECTORY), tableName, baseDir);
114102
}
115-
resource = classLoader.getResource(getBootstrapDataDir() + "/baseballStats_offline_table_config.json");
116-
com.google.common.base.Preconditions.checkNotNull(resource);
117-
FileUtils.copyURLToFile(resource, tableConfigFile);
118103

119104
QuickstartTableRequest request = new QuickstartTableRequest(baseDir.getAbsolutePath());
120105
QuickstartRunner runner =
@@ -133,13 +118,74 @@ public void execute()
133118
e.printStackTrace();
134119
}
135120
}));
136-
printStatus(Color.CYAN, "***** Bootstrap baseballStats table *****");
121+
printStatus(Color.CYAN, "***** Bootstrap " + tableName + " table *****");
137122
runner.bootstrapTable();
138123

139124
waitForBootstrapToComplete(runner);
140125

141126
printStatus(Color.YELLOW, "***** Offline quickstart setup complete *****");
142127

128+
if (useDefaultBootstrapTableDir()) {
129+
// Quickstart is using the default baseballStats sample table, so run sample queries.
130+
runSampleQueries(runner);
131+
}
132+
133+
printStatus(Color.GREEN, "You can always go to http://localhost:9000 to play around in the query console");
134+
}
135+
136+
private static void copyResourceTableToTmpDirectory(String sourcePath, String tableName, File baseDir, File dataDir)
137+
throws IOException {
138+
139+
File schemaFile = new File(baseDir, tableName + "_schema.json");
140+
File tableConfigFile = new File(baseDir, tableName + "_offline_table_config.json");
141+
File ingestionJobSpecFile = new File(baseDir, "ingestionJobSpec.yaml");
142+
File dataFile = new File(dataDir, tableName + "_data.csv");
143+
144+
ClassLoader classLoader = Quickstart.class.getClassLoader();
145+
URL resource = classLoader.getResource(sourcePath + File.separator + tableName + "_schema.json");
146+
com.google.common.base.Preconditions.checkNotNull(resource);
147+
FileUtils.copyURLToFile(resource, schemaFile);
148+
resource =
149+
classLoader.getResource(sourcePath + File.separator + "rawdata" + File.separator + tableName + "_data.csv");
150+
com.google.common.base.Preconditions.checkNotNull(resource);
151+
FileUtils.copyURLToFile(resource, dataFile);
152+
resource = classLoader.getResource(sourcePath + File.separator + "ingestionJobSpec.yaml");
153+
if (resource != null) {
154+
FileUtils.copyURLToFile(resource, ingestionJobSpecFile);
155+
}
156+
resource = classLoader.getResource(sourcePath + File.separator + tableName + "_offline_table_config.json");
157+
com.google.common.base.Preconditions.checkNotNull(resource);
158+
FileUtils.copyURLToFile(resource, tableConfigFile);
159+
}
160+
161+
private static void copyFilesystemTableToTmpDirectory(String sourcePath, String tableName, File baseDir)
162+
throws IOException {
163+
File fileDb = new File(sourcePath);
164+
165+
if (!fileDb.exists() || !fileDb.isDirectory()) {
166+
throw new RuntimeException("Directory " + fileDb.getAbsolutePath() + " not found.");
167+
}
168+
169+
File schemaFile = new File(fileDb, tableName + "_schema.json");
170+
if (!schemaFile.exists()) {
171+
throw new RuntimeException("Schema file " + schemaFile.getAbsolutePath() + " not found.");
172+
}
173+
174+
File tableFile = new File(fileDb, tableName + "_offline_table_config.json");
175+
if (!tableFile.exists()) {
176+
throw new RuntimeException("Table table " + tableFile.getAbsolutePath() + " not found.");
177+
}
178+
179+
File data = new File(fileDb, "rawdata" + File.separator + tableName + "_data.csv");
180+
if (!data.exists()) {
181+
throw new RuntimeException(("Data file " + data.getAbsolutePath() + " not found. "));
182+
}
183+
184+
FileUtils.copyDirectory(fileDb, baseDir);
185+
}
186+
187+
private static void runSampleQueries(QuickstartRunner runner)
188+
throws Exception {
143189
String q1 = "select count(*) from baseballStats limit 1";
144190
printStatus(Color.YELLOW, "Total number of documents in the table");
145191
printStatus(Color.CYAN, "Query : " + q1);
@@ -173,8 +219,6 @@ public void execute()
173219
printStatus(Color.CYAN, "Query : " + q5);
174220
printStatus(Color.YELLOW, prettyPrintResponse(runner.runQuery(q5)));
175221
printStatus(Color.GREEN, "***************************************************");
176-
177-
printStatus(Color.GREEN, "You can always go to http://localhost:9000 to play around in the query console");
178222
}
179223

180224
public static void main(String[] args)

pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/QuickStartCommand.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ public class QuickStartCommand extends AbstractBaseAdminCommand implements Comma
3939
description = "Type of quickstart, supported: STREAM/BATCH/HYBRID")
4040
private String _type;
4141

42+
@CommandLine.Option(names = {"-bootstrapTableDir"}, required = false,
43+
description = "Directory containing table schema, config, and data.")
44+
private String _bootstrapTableDir;
45+
4246
@CommandLine.Option(names = {"-tmpDir", "-quickstartDir", "-dataDir"}, required = false,
4347
description = "Temp Directory to host quickstart data")
4448
private String _tmpDir;
@@ -78,6 +82,14 @@ public void setTmpDir(String tmpDir) {
7882
_tmpDir = tmpDir;
7983
}
8084

85+
public String getBootstrapDataDir() {
86+
return _bootstrapTableDir;
87+
}
88+
89+
public void setBootstrapTableDir(String bootstrapTableDir) {
90+
_bootstrapTableDir = bootstrapTableDir;
91+
}
92+
8193
public String getZkExternalAddress() {
8294
return _zkExternalAddress;
8395
}
@@ -130,6 +142,10 @@ public boolean execute() throws Exception {
130142
quickstart.setDataDir(_tmpDir);
131143
}
132144

145+
if (_bootstrapTableDir != null) {
146+
quickstart.setBootstrapDataDir(_bootstrapTableDir);
147+
}
148+
133149
if (_zkExternalAddress != null) {
134150
quickstart.setZkExternalAddress(_zkExternalAddress);
135151
}

0 commit comments

Comments
 (0)