- Workaround a bug of Impala-JDBC-Driver, when creating insert-prepared-statements.

- Update dependencies.
springify_project
Lampros Smyrnaios 2 years ago
parent 33ba3e8d91
commit 82bf11b9b3

@ -1,5 +1,5 @@
plugins { plugins {
id 'org.springframework.boot' version '2.6.1' id 'org.springframework.boot' version '2.6.2'
id 'io.spring.dependency-management' version '1.0.11.RELEASE' id 'io.spring.dependency-management' version '1.0.11.RELEASE'
id 'java' id 'java'
} }
@ -43,6 +43,9 @@ dependencies {
// https://mvnrepository.com/artifact/com.google.guava/guava // https://mvnrepository.com/artifact/com.google.guava/guava
implementation group: 'com.google.guava', name: 'guava', version: '31.0.1-jre' implementation group: 'com.google.guava', name: 'guava', version: '31.0.1-jre'
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
implementation 'io.minio:minio:8.3.4' implementation 'io.minio:minio:8.3.4'
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp // https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-bin.zip distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip
zipStoreBase=GRADLE_USER_HOME zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists zipStorePath=wrapper/dists

@ -8,7 +8,7 @@ elif [[ $# -gt 1 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1 echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1
fi fi
gradleVersion="7.3.2" gradleVersion="7.3.3"
if [[ justInstall -eq 0 ]]; then if [[ justInstall -eq 0 ]]; then

@ -9,6 +9,7 @@ import eu.openaire.urls_controller.payloads.responces.AssignmentsResponse;
import eu.openaire.urls_controller.util.ControllerConstants; import eu.openaire.urls_controller.util.ControllerConstants;
import eu.openaire.urls_controller.util.FileUtils; import eu.openaire.urls_controller.util.FileUtils;
import eu.openaire.urls_controller.util.GenericUtils; import eu.openaire.urls_controller.util.GenericUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus; import org.springframework.http.HttpStatus;
@ -306,21 +307,21 @@ public class UrlController {
ImpalaConnector.databaseLock.lock(); ImpalaConnector.databaseLock.lock();
// Store the workerReport into the database. // Store the workerReport into the database.
String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".payload (id, original_url, actual_url, date, mimetype, size, hash, location, provenance) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"; String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".payload (id, original_url, actual_url, `date`, mimetype, size, `hash`, `location`, provenance) VALUES ";
String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, date, status, error_class, error_message) VALUES (?, ?, ?, ?, ?, ?)"; String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, `date`, status, error_class, error_message) VALUES ";
String tempInsertQueryName = null; String tempInsertQueryName = null;
PreparedStatement preparedInsertPayloadStatement = null, preparedInsertAttemptStatement = null; Statement insertPayloadStatement = null, insertAttemptStatement = null;
try { try {
tempInsertQueryName = "insertIntoPayloadBaseQuery"; tempInsertQueryName = "insertIntoPayloadBaseQuery";
preparedInsertPayloadStatement = con.prepareStatement(insertIntoPayloadBaseQuery); insertPayloadStatement = con.createStatement();
tempInsertQueryName = "insertIntoAttemptBaseQuery"; tempInsertQueryName = "insertIntoAttemptBaseQuery";
preparedInsertAttemptStatement = con.prepareStatement(insertIntoAttemptBaseQuery); insertAttemptStatement = con.createStatement();
} catch (SQLException sqle) { } catch (SQLException sqle) {
ImpalaConnector.databaseLock.unlock(); ImpalaConnector.databaseLock.unlock();
String errorMsg = "Problem when creating the prepared statement for \"" + tempInsertQueryName + "\"!\n"; String errorMsg = "Problem when creating the statement for \"" + tempInsertQueryName + "\"!\n";
logger.error(errorMsg + sqle.getMessage()); logger.error(errorMsg + sqle.getMessage());
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, con); closeStatements(insertPayloadStatement, insertAttemptStatement, con);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
} }
@ -330,13 +331,17 @@ public class UrlController {
ImpalaConnector.databaseLock.unlock(); ImpalaConnector.databaseLock.unlock();
String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!\n"; String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!\n";
logger.error(errorMsg + sqle.getMessage()); logger.error(errorMsg + sqle.getMessage());
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, con); closeStatements(insertPayloadStatement, insertAttemptStatement, con);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
} }
String payloadErrorMsg = null; String payloadErrorMsg = null;
int failedCount = 0; int failedCount = 0;
// TODO - Think about handling this loop with multiple threads..
// The Impala-server will handle the synchronization itself..
// Check online what happens with "statement.setPoolable()" does it improves speed? in multi or also in single thread?
for ( UrlReport urlReport : urlReports ) { for ( UrlReport urlReport : urlReports ) {
Payload payload = urlReport.getPayload(); Payload payload = urlReport.getPayload();
if ( payload == null ) { if ( payload == null ) {
@ -345,26 +350,20 @@ public class UrlController {
continue; continue;
} }
try { // We use a "PreparedStatement" to do insertions, for security reasons. try {
preparedInsertPayloadStatement.setString(1, payload.getId());
preparedInsertPayloadStatement.setString(2, payload.getOriginal_url());
preparedInsertPayloadStatement.setString(3, payload.getActual_url());
preparedInsertPayloadStatement.setTimestamp(4, payload.getTimestamp_acquired());
preparedInsertPayloadStatement.setString(5, payload.getMime_type());
// The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers. // The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers.
String stringSize = null; String stringSize = null;
Long size = payload.getSize(); Long size = payload.getSize();
if ( size != null ) if ( size != null )
stringSize = String.valueOf(size); stringSize = String.valueOf(size);
preparedInsertPayloadStatement.setString(6, stringSize); String insertIntoPayloadFullQuery = insertIntoPayloadBaseQuery + "('" + payload.getId() + "','" + payload.getOriginal_url() + "','" + payload.getActual_url() + "','"
preparedInsertPayloadStatement.setString(7, payload.getHash()); + payload.getTimestamp_acquired() + "','" + payload.getMime_type() + "','" + stringSize + "','" + payload.getHash() + "','"
preparedInsertPayloadStatement.setString(8, payload.getLocation()); + payload.getLocation() + "','" + payload.getProvenance() + "')";
preparedInsertPayloadStatement.setString(9, payload.getProvenance());
preparedInsertPayloadStatement.executeUpdate(); insertPayloadStatement.execute(insertIntoPayloadFullQuery);
} catch (SQLException sqle) { } catch (SQLException sqle) {
logger.error("Problem when executing the \"insertIntoPayloadBaseQuery\": " + sqle.getMessage() + "\n\n"); logger.error("Problem when executing the \"insertIntoPayloadFullQuery\": " + sqle.getMessage() + "\n\n");
} }
Error error = urlReport.getError(); Error error = urlReport.getError();
@ -373,14 +372,15 @@ public class UrlController {
error = new Error(null, null); error = new Error(null, null);
} }
try { // We use a "PreparedStatement" to do insertions, for security reasons. try {
preparedInsertAttemptStatement.setString(1, payload.getId()); String errorCause = error.getMessage();
preparedInsertAttemptStatement.setString(2, payload.getOriginal_url()); if ( errorCause != null )
preparedInsertAttemptStatement.setTimestamp(3, payload.getTimestamp_acquired()); errorCause = StringUtils.replace(errorCause, "'", "\\'", -1); // Escape single quotes in the error-cause-message.
preparedInsertAttemptStatement.setString(4, urlReport.getStatus().toString());
preparedInsertAttemptStatement.setString(5, String.valueOf(error.getType())); // This covers the case of "null". String insertIntoAttemptFullQuery = insertIntoAttemptBaseQuery + "('" + payload.getId() + "','" + payload.getOriginal_url() + "','"
preparedInsertAttemptStatement.setString(6, error.getMessage()); + payload.getTimestamp_acquired() + "','" + urlReport.getStatus().toString() + "','" + error.getType() + "','" + errorCause + "')";
preparedInsertAttemptStatement.executeUpdate();
insertAttemptStatement.execute(insertIntoAttemptFullQuery);
} catch (SQLException sqle) { } catch (SQLException sqle) {
logger.error("Problem when executing the \"insertIntoAttemptBaseQuery\": " + sqle.getMessage() + "\n\n"); logger.error("Problem when executing the \"insertIntoAttemptBaseQuery\": " + sqle.getMessage() + "\n\n");
} }
@ -394,7 +394,7 @@ public class UrlController {
logger.error(errorMsg + "\n" + sqle.getMessage()); logger.error(errorMsg + "\n" + sqle.getMessage());
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
} finally { } finally {
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, null); // Do not close the connection here! closeStatements(insertPayloadStatement, insertAttemptStatement, null); // Do not close the connection here!
} }
logger.debug("Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables. Going to merge the parquet files for those tables."); logger.debug("Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables. Going to merge the parquet files for those tables.");
@ -470,17 +470,17 @@ public class UrlController {
} }
private boolean closePreparedStatements(PreparedStatement preparedStatement1, PreparedStatement preparedStatement2, Connection con) { private boolean closeStatements(Statement statement1, Statement statement2, Connection con) {
try { try {
if ( preparedStatement1 != null ) if ( statement1 != null )
preparedStatement1.close(); statement1.close();
if ( preparedStatement2 != null ) if ( statement2 != null )
preparedStatement2.close(); statement2.close();
if ( con != null ) if ( con != null )
con.close(); // It may have already closed and that's fine. con.close(); // It may have already closed and that's fine.
return true; return true;
} catch (SQLException sqle) { } catch (SQLException sqle) {
logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage()); logger.error("Could not close the statements or the connection with the Impala-database.\n" + sqle.getMessage());
return false; return false;
} }
} }

@ -168,9 +168,9 @@ public class FileUtils {
try ( ResultSet resultSet = getFileLocationForHashPreparedStatement.executeQuery() ) { try ( ResultSet resultSet = getFileLocationForHashPreparedStatement.executeQuery() ) {
if ( resultSet.next() ) { // Move the "cursor" to the first row. If there is any data.. if ( resultSet.next() ) { // Move the "cursor" to the first row. If there is any data..
fileLocation = resultSet.getString(1); fileLocation = resultSet.getString(1);
if ( fileLocation != null ) { // If the full-text of this record is already-found. if ( fileLocation != null ) { // If the full-text of this record is already-found and uploaded.
payload.setLocation(fileLocation); // Set the location to the older identical file, which was uploaded to S3. payload.setLocation(fileLocation); // Set the location to the older identical file, which was uploaded to S3.
logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + fileLocation + "\"."); //logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + fileLocation + "\"."); // DEBUG!
numFilesFoundFromPreviousAssignmentsBatches ++; numFilesFoundFromPreviousAssignmentsBatches ++;
continue; continue;
} }

Loading…
Cancel
Save