UrlsController/src/main/java/eu/openaire/urls_controller/configuration/ImpalaConnector.java

80 lines
4.2 KiB
Java

package eu.openaire.urls_controller.configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;
import javax.annotation.PostConstruct;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
@Repository
public final class ImpalaConnector {
private static final Logger logger = LoggerFactory.getLogger(ImpalaConnector.class);
@Autowired
private JdbcTemplate jdbcTemplate;
@Value("services.pdfaggregation.controller.db.oldDatabaseName:pdfaggregation_i")
public static String oldDatabaseName;
@Value("services.pdfaggregation.controller.db.databaseName:pdfAggregationDatabase")
public static String databaseName;
public static final Lock databaseLock = new ReentrantLock(true); // This lock is locking the threads trying to execute queries in the database.
@PostConstruct
public void init() {
logger.info("Max available memory to the Controller: " + Runtime.getRuntime().maxMemory() + " bytes.");
try {
if ( jdbcTemplate.getDataSource().getConnection().getMetaData().supportsBatchUpdates() )
logger.warn("The database does not support \"BatchUpdates\"!");
} catch (Exception e) {
logger.error("Error testing if database supports batch updates", e);
}
createDatabase();
}
private void createDatabase() {
logger.info("Going to create the database and the tables, if they do not exist. Also will fill some tables with data from OpenAIRE.");
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + databaseName);
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".publication stored as parquet as select * from " + oldDatabaseName + ".publication");
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".publication");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".publication_pids stored as parquet as select * from " + oldDatabaseName + ".publication_pids");
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".publication_pids");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".publication_urls stored as parquet as select * from " + oldDatabaseName + ".publication_urls");
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".publication_urls");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".datasource stored as parquet as select * from " + oldDatabaseName + ".datasource");
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".datasource");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".assignment (id string, original_url string, workerid string, `date` timestamp) stored as parquet");
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".assignment");
jdbcTemplate.execute("DROP TABLE IF EXISTS " + ImpalaConnector.databaseName + ".current_assignment PURGE");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".attempt (id string, original_url string, `date` timestamp, status string, error_class string, error_message string) stored as parquet");
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".attempt");
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".payload (id string, original_url string, actual_url string, `date` timestamp, mimetype string, size string, `hash` string, `location` string, provenance string) stored as parquet");
jdbcTemplate.execute("COMPUTE STATS " + databaseName + ".payload");
logger.info("The database \"" + databaseName + "\" and its tables were created or validated.");
}
public static String handlePreparedStatementException(String queryName, String query, Exception e) {
String errorMsg = "Problem when creating " + (( ! queryName.startsWith("get")) ? "and executing " : "") + "the prepared statement for \"" + queryName + "\"!\n";
logger.error(errorMsg + "\n\n" + query + "\n\n", e);
return errorMsg;
}
}