forked from lsmyrnaios/UrlsController
Optimize the test-DB creation process:
- Use views of the "initialDatabase" view and tables to a) reduce the amount of space used by test-DBs and b) improve test-db creation performance. - Avoid possible failures from outdated metadata.
This commit is contained in:
parent
f61cae41a1
commit
b34417dc45
|
@ -58,23 +58,29 @@ public class DatabaseConnector {
|
|||
logger.info("Going to create (if not exist) the TEST-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from the initial-database \"" + initialDatabaseName + "\".");
|
||||
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);
|
||||
|
||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication");
|
||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication");
|
||||
try { // Metastore takes some time to recognize the DB has been created, in order to use it later..
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException ignore) {}
|
||||
|
||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication_pids stored as parquet as select * from " + initialDatabaseName + ".publication_pids");
|
||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication_pids");
|
||||
jdbcTemplate.update("INVALIDATE METADATA");
|
||||
|
||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication_urls stored as parquet as select * from " + initialDatabaseName + ".publication_urls");
|
||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication_urls");
|
||||
try { // Metastore takes some time to recognize the DB has been created, in order to use it later..
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException ignore) {}
|
||||
|
||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication_boost stored as parquet as select * from " + initialDatabaseName + ".publication_boost");
|
||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication_boost");
|
||||
// Create VIEWs of the original data. We just READ from it, so it's safe for our testing environment..
|
||||
|
||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".datasource stored as parquet as select * from " + initialDatabaseName + ".datasource");
|
||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".datasource");
|
||||
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication as select * from " + initialDatabaseName + ".publication");
|
||||
|
||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".payload_legacy stored as parquet as select * from " + initialDatabaseName + ".payload_legacy");
|
||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".payload_legacy");
|
||||
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication_pids as select * from " + initialDatabaseName + ".publication_pids");
|
||||
|
||||
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication_urls as select * from " + initialDatabaseName + ".publication_urls");
|
||||
|
||||
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication_boost as select * from " + initialDatabaseName + ".publication_boost");
|
||||
|
||||
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".datasource as select * from " + initialDatabaseName + ".datasource");
|
||||
|
||||
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".payload_legacy as select * from " + initialDatabaseName + ".payload_legacy");
|
||||
|
||||
databaseName = testDatabaseName;
|
||||
} else {
|
||||
|
@ -107,9 +113,21 @@ public class DatabaseConnector {
|
|||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".payload_bulk_import (id string, original_url string, actual_url string, `date` bigint, mimetype string, size string, `hash` string, `location` string, provenance string) stored as parquet");
|
||||
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".payload_bulk_import");
|
||||
|
||||
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + databaseName + ".payload " +
|
||||
"AS SELECT * from " + databaseName + ".payload_legacy " +
|
||||
"UNION ALL SELECT * FROM " + databaseName +".payload_aggregated " +
|
||||
|
||||
try { // Metastore takes some time to recognize the tables have been created, in order to use them in the view.
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException ignore) {}
|
||||
|
||||
jdbcTemplate.update("INVALIDATE METADATA");
|
||||
|
||||
try { // Metastore takes some time to recognize the tables have been created, in order to use them in the view.
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException ignore) {}
|
||||
|
||||
|
||||
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + databaseName + ".payload\n" +
|
||||
"AS SELECT * from " + databaseName + ".payload_legacy\n" +
|
||||
"UNION ALL SELECT * FROM " + databaseName +".payload_aggregated\n" +
|
||||
"UNION ALL SELECT * FROM " + databaseName + ".payload_bulk_import");
|
||||
// We do not do the "compute stats" for the view, since we get the following error: "COMPUTE STATS not supported for view: pdfaggregationdatabase_payloads_view.payload".
|
||||
|
||||
|
|
Loading…
Reference in New Issue