From b34417dc4567e8464198edb53d0b7a3246ad8ea9 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Thu, 14 Mar 2024 13:10:54 +0200 Subject: [PATCH] Optimize the test-DB creation process: - Use views of the "initialDatabase" view and tables to a) reduce the amount of space used by test-DBs and b) improve test-db creation performance. - Avoid possible failures from outdated metadata. --- .../configuration/DatabaseConnector.java | 48 +++++++++++++------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/src/main/java/eu/openaire/urls_controller/configuration/DatabaseConnector.java b/src/main/java/eu/openaire/urls_controller/configuration/DatabaseConnector.java index cc9b9c8..d6531d9 100644 --- a/src/main/java/eu/openaire/urls_controller/configuration/DatabaseConnector.java +++ b/src/main/java/eu/openaire/urls_controller/configuration/DatabaseConnector.java @@ -58,23 +58,29 @@ public class DatabaseConnector { logger.info("Going to create (if not exist) the TEST-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from the initial-database \"" + initialDatabaseName + "\"."); jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName); - jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication"); - jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication"); + try { // Metastore takes some time to recognize the DB has been created, in order to use it later.. + Thread.sleep(1000); + } catch (InterruptedException ignore) {} - jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication_pids stored as parquet as select * from " + initialDatabaseName + ".publication_pids"); - jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication_pids"); + jdbcTemplate.update("INVALIDATE METADATA"); - jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication_urls stored as parquet as select * from " + initialDatabaseName + ".publication_urls"); - jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication_urls"); + try { // Metastore takes some time to recognize the DB has been created, in order to use it later.. + Thread.sleep(1000); + } catch (InterruptedException ignore) {} - jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication_boost stored as parquet as select * from " + initialDatabaseName + ".publication_boost"); - jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".publication_boost"); + // Create VIEWs of the original data. We just READ from it, so it's safe for our testing environment.. - jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".datasource stored as parquet as select * from " + initialDatabaseName + ".datasource"); - jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".datasource"); + jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication as select * from " + initialDatabaseName + ".publication"); - jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".payload_legacy stored as parquet as select * from " + initialDatabaseName + ".payload_legacy"); - jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".payload_legacy"); + jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication_pids as select * from " + initialDatabaseName + ".publication_pids"); + + jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication_urls as select * from " + initialDatabaseName + ".publication_urls"); + + jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".publication_boost as select * from " + initialDatabaseName + ".publication_boost"); + + jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".datasource as select * from " + initialDatabaseName + ".datasource"); + + jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + testDatabaseName + ".payload_legacy as select * from " + initialDatabaseName + ".payload_legacy"); databaseName = testDatabaseName; } else { @@ -107,9 +113,21 @@ public class DatabaseConnector { jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".payload_bulk_import (id string, original_url string, actual_url string, `date` bigint, mimetype string, size string, `hash` string, `location` string, provenance string) stored as parquet"); jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".payload_bulk_import"); - jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + databaseName + ".payload " + - "AS SELECT * from " + databaseName + ".payload_legacy " + - "UNION ALL SELECT * FROM " + databaseName +".payload_aggregated " + + + try { // Metastore takes some time to recognize the tables have been created, in order to use them in the view. + Thread.sleep(1000); + } catch (InterruptedException ignore) {} + + jdbcTemplate.update("INVALIDATE METADATA"); + + try { // Metastore takes some time to recognize the tables have been created, in order to use them in the view. + Thread.sleep(1000); + } catch (InterruptedException ignore) {} + + + jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS " + databaseName + ".payload\n" + + "AS SELECT * from " + databaseName + ".payload_legacy\n" + + "UNION ALL SELECT * FROM " + databaseName +".payload_aggregated\n" + "UNION ALL SELECT * FROM " + databaseName + ".payload_bulk_import"); // We do not do the "compute stats" for the view, since we get the following error: "COMPUTE STATS not supported for view: pdfaggregationdatabase_payloads_view.payload".