2021-11-09 22:59:27 +01:00
package eu.openaire.urls_controller.configuration ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
2022-01-30 21:14:52 +01:00
import org.springframework.beans.factory.annotation.Autowired ;
import org.springframework.beans.factory.annotation.Value ;
import org.springframework.jdbc.core.JdbcTemplate ;
import org.springframework.stereotype.Repository ;
2021-12-21 14:55:27 +01:00
2022-01-30 21:14:52 +01:00
import javax.annotation.PostConstruct ;
2021-11-09 22:59:27 +01:00
import java.util.concurrent.locks.Lock ;
import java.util.concurrent.locks.ReentrantLock ;
2022-01-30 21:14:52 +01:00
@Repository
2021-11-09 22:59:27 +01:00
public final class ImpalaConnector {
private static final Logger logger = LoggerFactory . getLogger ( ImpalaConnector . class ) ;
2022-01-30 21:14:52 +01:00
@Autowired
private JdbcTemplate jdbcTemplate ;
2021-11-09 22:59:27 +01:00
2022-01-30 21:14:52 +01:00
@Value ( " services.pdfaggregation.controller.db.oldDatabaseName:pdfaggregation_i " )
2022-01-28 06:24:42 +01:00
public static String oldDatabaseName ;
2022-01-30 21:14:52 +01:00
@Value ( " services.pdfaggregation.controller.db.databaseName:pdfAggregationDatabase " )
2022-01-28 06:24:42 +01:00
public static String databaseName ;
2021-11-09 22:59:27 +01:00
public static final Lock databaseLock = new ReentrantLock ( true ) ; // This lock is locking the threads trying to execute queries in the database.
2022-01-30 21:14:52 +01:00
@PostConstruct
public void init ( ) {
2021-11-09 22:59:27 +01:00
logger . info ( " Max available memory to the Controller: " + Runtime . getRuntime ( ) . maxMemory ( ) + " bytes. " ) ;
try {
2022-01-30 21:14:52 +01:00
if ( jdbcTemplate . getDataSource ( ) . getConnection ( ) . getMetaData ( ) . supportsBatchUpdates ( ) )
logger . warn ( " The database does not support \" BatchUpdates \" ! " ) ;
} catch ( Exception e ) {
logger . error ( " Error testing if database supports batch updates " , e ) ;
2021-11-09 22:59:27 +01:00
}
createDatabase ( ) ;
}
2022-01-30 21:14:52 +01:00
private void createDatabase ( ) {
2021-11-09 22:59:27 +01:00
logger . info ( " Going to create the database and the tables, if they do not exist. Also will fill some tables with data from OpenAIRE. " ) ;
2022-01-30 21:14:52 +01:00
jdbcTemplate . execute ( " CREATE DATABASE IF NOT EXISTS " + databaseName ) ;
2021-11-09 22:59:27 +01:00
2022-01-30 21:14:52 +01:00
jdbcTemplate . execute ( " CREATE TABLE IF NOT EXISTS " + databaseName + " .publication stored as parquet as select * from " + oldDatabaseName + " .publication " ) ;
jdbcTemplate . execute ( " COMPUTE STATS " + databaseName + " .publication " ) ;
2021-11-09 22:59:27 +01:00
2022-01-30 21:14:52 +01:00
jdbcTemplate . execute ( " CREATE TABLE IF NOT EXISTS " + databaseName + " .publication_pids stored as parquet as select * from " + oldDatabaseName + " .publication_pids " ) ;
jdbcTemplate . execute ( " COMPUTE STATS " + databaseName + " .publication_pids " ) ;
2021-11-09 22:59:27 +01:00
2022-01-30 21:14:52 +01:00
jdbcTemplate . execute ( " CREATE TABLE IF NOT EXISTS " + databaseName + " .publication_urls stored as parquet as select * from " + oldDatabaseName + " .publication_urls " ) ;
jdbcTemplate . execute ( " COMPUTE STATS " + databaseName + " .publication_urls " ) ;
2021-11-09 22:59:27 +01:00
2022-01-30 21:14:52 +01:00
jdbcTemplate . execute ( " CREATE TABLE IF NOT EXISTS " + databaseName + " .datasource stored as parquet as select * from " + oldDatabaseName + " .datasource " ) ;
jdbcTemplate . execute ( " COMPUTE STATS " + databaseName + " .datasource " ) ;
2021-11-09 22:59:27 +01:00
2022-01-30 21:14:52 +01:00
jdbcTemplate . execute ( " CREATE TABLE IF NOT EXISTS " + databaseName + " .assignment (id string, original_url string, workerid string, `date` timestamp) stored as parquet " ) ;
jdbcTemplate . execute ( " COMPUTE STATS " + databaseName + " .assignment " ) ;
2021-11-09 22:59:27 +01:00
2022-01-30 21:14:52 +01:00
jdbcTemplate . execute ( " DROP TABLE IF EXISTS " + ImpalaConnector . databaseName + " .current_assignment PURGE " ) ;
2022-01-19 00:37:47 +01:00
2022-01-30 21:14:52 +01:00
jdbcTemplate . execute ( " CREATE TABLE IF NOT EXISTS " + databaseName + " .attempt (id string, original_url string, `date` timestamp, status string, error_class string, error_message string) stored as parquet " ) ;
jdbcTemplate . execute ( " COMPUTE STATS " + databaseName + " .attempt " ) ;
2021-11-09 22:59:27 +01:00
2022-01-30 21:14:52 +01:00
jdbcTemplate . execute ( " CREATE TABLE IF NOT EXISTS " + databaseName + " .payload (id string, original_url string, actual_url string, `date` timestamp, mimetype string, size string, `hash` string, `location` string, provenance string) stored as parquet " ) ;
jdbcTemplate . execute ( " COMPUTE STATS " + databaseName + " .payload " ) ;
2021-11-09 22:59:27 +01:00
logger . info ( " The database \" " + databaseName + " \" and its tables were created or validated. " ) ;
}
2022-01-31 03:17:16 +01:00
public static String handlePreparedStatementException ( String queryName , String query , Exception e ) {
2021-11-30 12:26:19 +01:00
String errorMsg = " Problem when creating " + ( ( ! queryName . startsWith ( " get " ) ) ? " and executing " : " " ) + " the prepared statement for \" " + queryName + " \" ! \ n " ;
2022-01-30 21:14:52 +01:00
logger . error ( errorMsg + " \ n \ n " + query + " \ n \ n " , e ) ;
2021-11-30 12:26:19 +01:00
return errorMsg ;
}
2021-11-09 22:59:27 +01:00
}