logger.info("Going to create (if not exist) the TEST-database \""+testDatabaseName+"\" and its tables. Also will fill some tables with data from the initial-database \""+initialDatabaseName+"\".");
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS "+testDatabaseName);
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS "+testDatabaseName+".publication stored as parquet as select * from "+initialDatabaseName+".publication");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS "+testDatabaseName+".publication_pids stored as parquet as select * from "+initialDatabaseName+".publication_pids");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS "+testDatabaseName+".publication_urls stored as parquet as select * from "+initialDatabaseName+".publication_urls");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS "+testDatabaseName+".datasource stored as parquet as select * from "+initialDatabaseName+".datasource");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS "+testDatabaseName+".payload_legacy stored as parquet as select * from "+initialDatabaseName+".payload_legacy");
// Note that for the "initialDatabase", the initial 5 tables are expected to be created either manually or by other pieces of software, as views of the contents of the Graph.
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS "+databaseName+".assignment (id string, original_url string, workerid string, `date` bigint) stored as parquet");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS "+databaseName+".attempt (id string, original_url string, `date` bigint, status string, error_class string, error_message string) stored as parquet");
// Create a VIEW "payload" which consists of 3 different tables:
// 1) The "payload_legacy" table, which contains data older than 2022, which is imported by previous full-text aggregation processes.
// 2) The "payload_aggregated" table, which contains data from 2022 onwards, collected by the new PDF-Aggregation-Service.
// 3) The "payload_bulk_import", which contains data collected from the bulk-imported content from datasources like "arXiv".
// So, each aggregation process will "load" its contents to the right table, but in order to get the "total" metrics, we can just query the "payload" view.
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS "+databaseName+".payload_aggregated (id string, original_url string, actual_url string, `date` bigint, mimetype string, size string, `hash` string, `location` string, provenance string) stored as parquet");
jdbcTemplate.execute("CREATE VIEW IF NOT EXISTS "+databaseName+".payload "+
"AS SELECT * from "+databaseName+".payload_legacy "+
"UNION ALL SELECT * FROM "+databaseName+".payload_aggregated "+
"UNION ALL SELECT * FROM "+databaseName+".payload_bulk_import");
// We do not do the "compute stats" for the view, since we get the following error: "COMPUTE STATS not supported for view: pdfaggregationdatabase_payloads_view.payload".