- Fix a "databaseLock" bug, which could cause both the payload and attempt inserts and the "mergeParquetFiles" to fail, as the inserts could be executed concurrently with tables-compaction.

- Fix the "null" representation of an "unknown" payload-size in the database.
- Remove the obsolete thread-locking for the "CreateDatabase" operation. This code is guaranteed to run BEFORE any other operation in the database.
- Implement the "handlePreparedStatementException" and "closeConnection" methods.
- Improve error-logs.
- Update dependencies.
- Code cleanup.
This commit is contained in:
Lampros Smyrnaios 2021-11-30 13:26:19 +02:00
parent d100af35d0
commit 780ed15ce2
6 changed files with 85 additions and 80 deletions

View File

@ -1,5 +1,5 @@
plugins { plugins {
id 'org.springframework.boot' version '2.5.6' id 'org.springframework.boot' version '2.6.1'
id 'io.spring.dependency-management' version '1.0.11.RELEASE' id 'io.spring.dependency-management' version '1.0.11.RELEASE'
id 'java' id 'java'
} }

View File

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.2-bin.zip distributionUrl=https\://services.gradle.org/distributions/gradle-7.3-bin.zip
zipStoreBase=GRADLE_USER_HOME zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists zipStorePath=wrapper/dists

View File

@ -8,7 +8,7 @@ elif [[ $# -gt 1 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1 echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1
fi fi
gradleVersion="7.2" gradleVersion="7.3"
if [[ justInstall -eq 0 ]]; then if [[ justInstall -eq 0 ]]; then

View File

@ -41,22 +41,6 @@ public final class ImpalaConnector {
} }
public HikariDataSource impalaDS() throws SQLException, PropertyVetoException
{
HikariConfig hikariConfig = new HikariConfig();
hikariConfig.setDriverClassName(ImpalaConnector.impalaDriver);
hikariConfig.setAutoCommit(true);
hikariConfig.setJdbcUrl(ImpalaConnector.impalaConnectionUrl);
hikariConfig.setPoolName(poolName);
hikariConfig.setMaximumPoolSize(hikariMaxConnectionPoolSize);
hikariConfig.setMaxLifetime(hikariMaxLifetime);
hikariConfig.setMinimumIdle(hikariMinIdleConnections);
hikariConfig.setConnectionTimeout(hikariConnectionTimeOut);
hikariConfig.setIdleTimeout(hikariIdleTimeOut);
return new HikariDataSource(hikariConfig);
}
public ImpalaConnector() public ImpalaConnector()
{ {
logger.info("Max available memory to the Controller: " + Runtime.getRuntime().maxMemory() + " bytes."); logger.info("Max available memory to the Controller: " + Runtime.getRuntime().maxMemory() + " bytes.");
@ -81,36 +65,49 @@ public final class ImpalaConnector {
} else } else
throw new RuntimeException("The \"impalaDriver\" was null or empty!"); throw new RuntimeException("The \"impalaDriver\" was null or empty!");
} catch(Exception e) { } catch(Exception e) {
logger.error("Error when loading the database properties!\n" + e.getMessage()); String errorMsg = "Error when loading the database properties!\n" + e.getMessage();
e.printStackTrace(); logger.error(errorMsg, e);
System.err.println(errorMsg);
System.exit(11); System.exit(11);
} }
try { try {
hikariDataSource = impalaDS(); hikariDataSource = impalaDS();
} catch (SQLException | PropertyVetoException e) { } catch (SQLException | PropertyVetoException e) {
logger.error("Problem when creating the Hikari connection pool!"); logger.error("Problem when creating the Hikari connection pool!", e);
e.printStackTrace();
} }
createDatabase(); createDatabase();
} }
public HikariDataSource impalaDS() throws SQLException, PropertyVetoException
{
HikariConfig hikariConfig = new HikariConfig();
hikariConfig.setDriverClassName(ImpalaConnector.impalaDriver);
hikariConfig.setAutoCommit(true);
hikariConfig.setJdbcUrl(ImpalaConnector.impalaConnectionUrl);
hikariConfig.setPoolName(poolName);
hikariConfig.setMaximumPoolSize(hikariMaxConnectionPoolSize);
hikariConfig.setMaxLifetime(hikariMaxLifetime);
hikariConfig.setMinimumIdle(hikariMinIdleConnections);
hikariConfig.setConnectionTimeout(hikariConnectionTimeOut);
hikariConfig.setIdleTimeout(hikariIdleTimeOut);
return new HikariDataSource(hikariConfig);
}
public void createDatabase() public void createDatabase()
{ {
databaseLock.lock(); // Make sure the database and tables are created before the workers can request assignments.
Connection con = getConnection(); Connection con = getConnection();
if ( con == null ) { if ( con == null )
databaseLock.unlock();
System.exit(22); System.exit(22);
}
try { try {
if ( !con.getMetaData().supportsBatchUpdates() ) if ( !con.getMetaData().supportsBatchUpdates() )
logger.warn("The database does not support \"BatchUpdates\"!"); logger.warn("The database does not support \"BatchUpdates\"!");
} catch (SQLException e) { } catch (SQLException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
logger.info("Going to create the database and the tables, if they do not exist. Also will fill some tables with data from OpenAIRE."); logger.info("Going to create the database and the tables, if they do not exist. Also will fill some tables with data from OpenAIRE.");
@ -119,12 +116,7 @@ public final class ImpalaConnector {
statement = con.createStatement(); statement = con.createStatement();
} catch (SQLException sqle) { } catch (SQLException sqle) {
logger.error("Problem when creating a connection-statement!\n" + sqle.getMessage()); logger.error("Problem when creating a connection-statement!\n" + sqle.getMessage());
try { ImpalaConnector.closeConnection(con);
con.close();
} catch (SQLException sqle2) {
logger.error("Could not close the connection with the Impala-database.\n" + sqle2.getMessage());
}
databaseLock.unlock();
System.exit(33); System.exit(33);
} }
@ -152,11 +144,11 @@ public final class ImpalaConnector {
statement.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".payload (id string, original_url string, actual_url string, `date` timestamp, mimetype string, size string, `hash` string, `location` string, provenance string) stored as parquet"); statement.execute("CREATE TABLE IF NOT EXISTS " + databaseName + ".payload (id string, original_url string, actual_url string, `date` timestamp, mimetype string, size string, `hash` string, `location` string, provenance string) stored as parquet");
statement.execute("COMPUTE STATS " + databaseName + ".payload"); statement.execute("COMPUTE STATS " + databaseName + ".payload");
} catch (SQLException sqle) { } catch (SQLException sqle) {
logger.error("Problem when executing the \"create database and create tables queries!\n" + sqle.getMessage() + "\nSQL state: " + sqle.getSQLState() + "\nError code: " + sqle.getErrorCode()); String errorMsg = "Problem when executing the \"create database and create tables queries!\n" + sqle.getMessage() + "\nSQL state: " + sqle.getSQLState() + "\nError code: " + sqle.getErrorCode();
sqle.printStackTrace(); logger.error(errorMsg, sqle);
System.err.println(errorMsg);
System.exit(44); System.exit(44);
} finally { } finally {
databaseLock.unlock();
try { try {
statement.close(); statement.close();
con.close(); con.close();
@ -222,7 +214,7 @@ public final class ImpalaConnector {
//logger.debug(responseEntity.toString()); //logger.debug(responseEntity.toString());
} catch (SQLException sqle) { } catch (SQLException sqle) {
sqle.printStackTrace(); logger.error(sqle.getMessage(), sqle);
return false; return false;
} finally { } finally {
try { try {
@ -236,4 +228,25 @@ public final class ImpalaConnector {
return true; return true;
} }
public static boolean closeConnection(Connection con) {
try {
if ( con != null )
con.close(); // It may have already closed and that's fine.
return true;
} catch (SQLException sqle) {
logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage());
return false;
}
}
public static String handlePreparedStatementException(String queryName, String query, String preparedStatementName, PreparedStatement preparedStatement, Connection con, Exception e)
{
String errorMsg = "Problem when creating " + (( ! queryName.startsWith("get")) ? "and executing " : "") + "the prepared statement for \"" + queryName + "\"!\n";
logger.error(errorMsg + "\n\n" + query + "\n\n" + e.getMessage(), e);
closeConnection(con);
return errorMsg;
}
} }

View File

@ -5,10 +5,8 @@ import eu.openaire.urls_controller.configuration.ImpalaConnector;
import eu.openaire.urls_controller.models.Error; import eu.openaire.urls_controller.models.Error;
import eu.openaire.urls_controller.models.*; import eu.openaire.urls_controller.models.*;
import eu.openaire.urls_controller.payloads.requests.WorkerReport; import eu.openaire.urls_controller.payloads.requests.WorkerReport;
import eu.openaire.urls_controller.payloads.responces.AssignmentResponse; import eu.openaire.urls_controller.payloads.responces.AssignmentsResponse;
import eu.openaire.urls_controller.util.ControllerConstants; import eu.openaire.urls_controller.util.*;
import eu.openaire.urls_controller.util.FileUtils;
import eu.openaire.urls_controller.util.GenericUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus; import org.springframework.http.HttpStatus;
@ -137,8 +135,7 @@ public class UrlController {
datasource.setId(resultSet.getString(3)); datasource.setId(resultSet.getString(3));
datasource.setName(resultSet.getString(4)); datasource.setName(resultSet.getString(4));
} catch (SQLException sqle) { } catch (SQLException sqle) {
logger.error("No value was able to be retrieved from one of the columns of row_" + resultSet.getRow()); logger.error("No value was able to be retrieved from one of the columns of row_" + resultSet.getRow(), sqle);
sqle.printStackTrace();
continue; // This object is broken, move to the next row. continue; // This object is broken, move to the next row.
} }
assignment.setDatasource(datasource); assignment.setDatasource(datasource);
@ -262,8 +259,8 @@ public class UrlController {
closePreparedStatements(preparedInsertAssignmentStatement, null, con); closePreparedStatements(preparedInsertAssignmentStatement, null, con);
} }
logger.info("Sending batch_" + assignmentsBatchCounter.incrementAndGet() + " with " + assignmentsSize + " assignments to worker with ID: " + workerId + "."); logger.info("Sending batch-assignments_" + assignmentsBatchCounter.incrementAndGet() + " with " + assignmentsSize + " assignments to worker with ID: " + workerId + ".");
return ResponseEntity.status(HttpStatus.OK).body(new AssignmentResponse(assignmentsBatchCounter.get(), assignments)); return ResponseEntity.status(HttpStatus.OK).body(new AssignmentsResponse(assignmentsBatchCounter.get(), assignments));
} }
@ -286,9 +283,7 @@ public class UrlController {
logger.info("Received the WorkerReport for batch_ " + workerReport.getAssignmentRequestCounter() + ", from the worker with id: " + workerReport.getWorkerId() + ". It contains " + urlReports.size() + " urlReports. Going to insert them into the database."); logger.info("Received the WorkerReport for batch_ " + workerReport.getAssignmentRequestCounter() + ", from the worker with id: " + workerReport.getWorkerId() + ". It contains " + urlReports.size() + " urlReports. Going to insert them into the database.");
// TODO - The Controller will have to request the files from the Worker, in order to upload them to the S3, in the future. ImpalaConnector.databaseLock.lock();
// We will have to UPDATE the "location" of each of those files in the UrlReports and then insert them all into the database.
Connection con = ImpalaConnector.getInstance().getConnection(); Connection con = ImpalaConnector.getInstance().getConnection();
if ( con == null ) if ( con == null )
@ -313,7 +308,7 @@ public class UrlController {
} }
try { try {
con.setAutoCommit(false); con.setAutoCommit(false); // Avoid writing to disk for each insert. Write them all in the end.
} catch (SQLException sqle) { } catch (SQLException sqle) {
String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!"; String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!";
logger.error(errorMsg + "\n" + sqle.getMessage()); logger.error(errorMsg + "\n" + sqle.getMessage());
@ -327,24 +322,26 @@ public class UrlController {
for ( UrlReport urlReport : urlReports ) { for ( UrlReport urlReport : urlReports ) {
Payload payload = urlReport.getPayload(); Payload payload = urlReport.getPayload();
if ( payload == null ) { if ( payload == null ) {
logger.error("Payload was \"null\" for a \"urlReport\"!"); logger.error("Payload was \"null\" for a \"urlReport\", in assignments_" + assignmentsBatchCounter);
// TODO - A bit rare to happen.. but what should we do? (otherwise an NPE will be thrown later)
// We can't just create an empty object and proceed, since the payload is as important as the whole row to be inserted..
// TODO - Just add it in an errorMessage to be returned in the end. Should it return HTTP-200 but with a small error message along?
payloadErrorMsg = (++failedCount) + " urlReports failed to be processed because they had no payload!"; payloadErrorMsg = (++failedCount) + " urlReports failed to be processed because they had no payload!";
continue; continue;
} }
String tempFullQueryString = null; String tempFullQueryString = null;
try { // We use a "PreparedStatement" to do insertions, for security reasons. try { // We use a "PreparedStatement" to do insertions, for security reasons.
preparedInsertPayloadStatement.setString(1, payload.getId()); preparedInsertPayloadStatement.setString(1, payload.getId());
preparedInsertPayloadStatement.setString(2, payload.getOriginal_url()); preparedInsertPayloadStatement.setString(2, payload.getOriginal_url());
preparedInsertPayloadStatement.setString(3, payload.getActual_url()); preparedInsertPayloadStatement.setString(3, payload.getActual_url());
preparedInsertPayloadStatement.setDate(4, payload.getDate_acquired()); preparedInsertPayloadStatement.setDate(4, payload.getDate_acquired());
preparedInsertPayloadStatement.setString(5, payload.getMime_type()); preparedInsertPayloadStatement.setString(5, payload.getMime_type());
// The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers. // The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers.
preparedInsertPayloadStatement.setString(6, String.valueOf(payload.getSize())); String stringSize = null;
Long size = payload.getSize();
if ( size != null )
stringSize = String.valueOf(size);
preparedInsertPayloadStatement.setString(6, stringSize);
preparedInsertPayloadStatement.setString(7, payload.getHash()); preparedInsertPayloadStatement.setString(7, payload.getHash());
preparedInsertPayloadStatement.setString(8, payload.getLocation()); preparedInsertPayloadStatement.setString(8, payload.getLocation());
preparedInsertPayloadStatement.setString(9, payload.getProvenance()); preparedInsertPayloadStatement.setString(9, payload.getProvenance());
@ -374,13 +371,12 @@ public class UrlController {
} }
}//end for-loop }//end for-loop
ImpalaConnector.databaseLock.lock();
try { try {
con.commit(); // Send all the insert-queries to the database. con.commit(); // Commit all the insert-queries to the database (write them to disk).
} catch (SQLException sqle) { } catch (SQLException sqle) {
ImpalaConnector.databaseLock.unlock();
String errorMsg = "Problem when committing changes to the database or when setting Connection.AutoCommit to \"true\"!"; String errorMsg = "Problem when committing changes to the database or when setting Connection.AutoCommit to \"true\"!";
logger.error(errorMsg + "\n" + sqle.getMessage()); logger.error(errorMsg + "\n" + sqle.getMessage());
ImpalaConnector.databaseLock.unlock();
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
} finally { } finally {
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, null); // Do not close the connection here! closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, null); // Do not close the connection here!
@ -391,21 +387,19 @@ public class UrlController {
String mergeErrorMsg = mergeParquetFiles("payload", con); String mergeErrorMsg = mergeParquetFiles("payload", con);
if ( mergeErrorMsg != null ) { if ( mergeErrorMsg != null ) {
ImpalaConnector.databaseLock.unlock(); ImpalaConnector.databaseLock.unlock();
try { con.close(); } ImpalaConnector.closeConnection(con);
catch (SQLException sqle) { logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage()); }
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(mergeErrorMsg); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(mergeErrorMsg);
} }
mergeErrorMsg = mergeParquetFiles("attempt", con); mergeErrorMsg = mergeParquetFiles("attempt", con);
if ( mergeErrorMsg != null ) { if ( mergeErrorMsg != null ) {
ImpalaConnector.databaseLock.unlock(); ImpalaConnector.databaseLock.unlock();
try { con.close(); } ImpalaConnector.closeConnection(con);
catch (SQLException sqle) { logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage()); }
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(mergeErrorMsg); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(mergeErrorMsg);
} }
try { try {
con.commit(); // Apply the merges. con.commit(); // Apply the merges permanently (write them to disk).
con.setAutoCommit(true); // Restore the "auto-commit" value for this connection of the pool. con.setAutoCommit(true); // Restore the "auto-commit" value for this connection of the pool.
} catch (SQLException sqle) { } catch (SQLException sqle) {
String errorMsg = "Problem when committing changes to the database!"; String errorMsg = "Problem when committing changes to the database!";
@ -414,8 +408,7 @@ public class UrlController {
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
} finally { } finally {
ImpalaConnector.databaseLock.unlock(); ImpalaConnector.databaseLock.unlock();
try { con.close(); } ImpalaConnector.closeConnection(con);
catch (SQLException sqle) { logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage()); }
} }
return ResponseEntity.status(HttpStatus.OK).body(payloadErrorMsg); return ResponseEntity.status(HttpStatus.OK).body(payloadErrorMsg);
@ -516,8 +509,7 @@ public class UrlController {
if ( con != null ) if ( con != null )
con.close(); // It may have already closed and that's fine. con.close(); // It may have already closed and that's fine.
return true; return true;
} } catch (SQLException sqle) {
catch (SQLException sqle) {
logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage()); logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage());
return false; return false;
} }
@ -576,7 +568,7 @@ public class UrlController {
logger.info("Sending batch_" + assignmentsBatchCounter.incrementAndGet() + " with " + assignments.size() + " assignments (" + FileUtils.duplicateIdUrlEntries.get() + " more assignments were discarded as duplicates), to worker with ID: " + workerId); logger.info("Sending batch_" + assignmentsBatchCounter.incrementAndGet() + " with " + assignments.size() + " assignments (" + FileUtils.duplicateIdUrlEntries.get() + " more assignments were discarded as duplicates), to worker with ID: " + workerId);
return ResponseEntity.status(HttpStatus.OK).header("Content-Type", "application/json").body(new AssignmentResponse(assignmentsBatchCounter.get(), assignments)); return ResponseEntity.status(HttpStatus.OK).header("Content-Type", "application/json").body(new AssignmentsResponse(assignmentsBatchCounter.get(), assignments));
} }
} }

View File

@ -8,26 +8,26 @@ import java.util.List;
@JsonInclude(JsonInclude.Include.NON_NULL) @JsonInclude(JsonInclude.Include.NON_NULL)
public class AssignmentResponse { public class AssignmentsResponse {
@JsonProperty("assignmentCounter") @JsonProperty("assignmentsCounter")
private Long assignmentCounter; private Long assignmentsCounter;
@JsonProperty("assignments") @JsonProperty("assignments")
private List<Assignment> assignments; private List<Assignment> assignments;
public AssignmentResponse(Long assignmentCounter, List<Assignment> assignments) { public AssignmentsResponse(Long assignmentCounter, List<Assignment> assignments) {
this.assignmentCounter = assignmentCounter; this.assignmentsCounter = assignmentCounter;
this.assignments = assignments; this.assignments = assignments;
} }
public Long getAssignmentCounter() { public Long getAssignmentsCounter() {
return assignmentCounter; return assignmentsCounter;
} }
public void setAssignmentCounter(Long assignmentCounter) { public void setAssignmentsCounter(Long assignmentsCounter) {
this.assignmentCounter = assignmentCounter; this.assignmentsCounter = assignmentsCounter;
} }
public List<Assignment> getAssignments() { public List<Assignment> getAssignments() {
@ -41,8 +41,8 @@ public class AssignmentResponse {
@Override @Override
public String toString() { public String toString() {
return "AssignmentResponse{" + return "AssignmentsResponse{" +
"assignmentCounter=" + assignmentCounter + "assignmentsCounter=" + assignmentsCounter +
", assignments=" + assignments + ", assignments=" + assignments +
'}'; '}';
} }