Optimize the test-DB creation process:
- Use views of the "initialDatabase" view and tables to a) reduce the amount of space used by test-DBs and b) improve test-db creation performance. - Avoid possible failures from outdated metadata.
This commit is contained in:
parent
f61cae41a1
commit
b34417dc45
|
@ -58,23 +58,29 @@ public class DatabaseConnector {
|
||||||
logger.info("Going to create (if not exist) the TEST-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from the initial-database \"" + initialDatabaseName + "\".");
|
logger.info("Going to create (if not exist) the TEST-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from the initial-database \"" + initialDatabaseName + "\".");
|
||||||
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);
|
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);
|
||||||
|
|
||||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication");
|
try { // Metastore takes some time to recognize the DB has been created, in order to use it later..
|
||||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication");
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException ignore) {}
|
||||||
|
|
||||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication_pids stored as parquet as select * from " + initialDatabaseName + ".publication_pids");
|
jdbcTemplate.update("INVALIDATE METADATA");
|
||||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication_pids");
|
|
||||||
|
|
||||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication_urls stored as parquet as select * from " + initialDatabaseName + ".publication_urls");
|
try { // Metastore takes some time to recognize the DB has been created, in order to use it later..
|
||||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication_urls");
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException ignore) {}
|
||||||
|
|
||||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication_boost stored as parquet as select * from " + initialDatabaseName + ".publication_boost");
|
// Create VIEWs of the original data. We just READ from it, so it's safe for our testing environment..
|
||||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication_boost");
|
|
||||||
|
|
||||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".datasource stored as parquet as select * from " + initialDatabaseName + ".datasource");
|
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication as select * from " + initialDatabaseName + ".publication");
|
||||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".datasource");
|
|
||||||
|
|
||||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".payload_legacy stored as parquet as select * from " + initialDatabaseName + ".payload_legacy");
|
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication_pids as select * from " + initialDatabaseName + ".publication_pids");
|
||||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".payload_legacy");
|
|
||||||
|
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication_urls as select * from " + initialDatabaseName + ".publication_urls");
|
||||||
|
|
||||||
|
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication_boost as select * from " + initialDatabaseName + ".publication_boost");
|
||||||
|
|
||||||
|
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".datasource as select * from " + initialDatabaseName + ".datasource");
|
||||||
|
|
||||||
|
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".payload_legacy as select * from " + initialDatabaseName + ".payload_legacy");
|
||||||
|
|
||||||
databaseName = testDatabaseName;
|
databaseName = testDatabaseName;
|
||||||
} else {
|
} else {
|
||||||
|
@ -107,9 +113,21 @@ public class DatabaseConnector {
|
||||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".payload_bulk_import (id string, original_url string, actual_url string, `date` bigint, mimetype string, size string, `hash` string, `location` string, provenance string) stored as parquet");
|
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".payload_bulk_import (id string, original_url string, actual_url string, `date` bigint, mimetype string, size string, `hash` string, `location` string, provenance string) stored as parquet");
|
||||||
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".payload_bulk_import");
|
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".payload_bulk_import");
|
||||||
|
|
||||||
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + databaseName + ".payload " +
|
|
||||||
"AS SELECT * from " + databaseName + ".payload_legacy " +
|
try { // Metastore takes some time to recognize the tables have been created, in order to use them in the view.
|
||||||
"UNION ALL SELECT * FROM " + databaseName +".payload_aggregated " +
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException ignore) {}
|
||||||
|
|
||||||
|
jdbcTemplate.update("INVALIDATE METADATA");
|
||||||
|
|
||||||
|
try { // Metastore takes some time to recognize the tables have been created, in order to use them in the view.
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException ignore) {}
|
||||||
|
|
||||||
|
|
||||||
|
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + databaseName + ".payload\n" +
|
||||||
|
"AS SELECT * from " + databaseName + ".payload_legacy\n" +
|
||||||
|
"UNION ALL SELECT * FROM " + databaseName +".payload_aggregated\n" +
|
||||||
"UNION ALL SELECT * FROM " + databaseName + ".payload_bulk_import");
|
"UNION ALL SELECT * FROM " + databaseName + ".payload_bulk_import");
|
||||||
// We do not do the "compute stats" for the view, since we get the following error: "COMPUTE STATS not supported for view: pdfaggregationdatabase_payloads_view.payload".
|
// We do not do the "compute stats" for the view, since we get the following error: "COMPUTE STATS not supported for view: pdfaggregationdatabase_payloads_view.payload".
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue