- Workaround a bug of Impala-JDBC-Driver, when creating insert-prepared-statements.
- Update dependencies.
This commit is contained in:
parent
33ba3e8d91
commit
82bf11b9b3
|
@ -1,5 +1,5 @@
|
|||
plugins {
|
||||
id 'org.springframework.boot' version '2.6.1'
|
||||
id 'org.springframework.boot' version '2.6.2'
|
||||
id 'io.spring.dependency-management' version '1.0.11.RELEASE'
|
||||
id 'java'
|
||||
}
|
||||
|
@ -43,6 +43,9 @@ dependencies {
|
|||
// https://mvnrepository.com/artifact/com.google.guava/guava
|
||||
implementation group: 'com.google.guava', name: 'guava', version: '31.0.1-jre'
|
||||
|
||||
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
|
||||
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
|
||||
|
||||
implementation 'io.minio:minio:8.3.4'
|
||||
|
||||
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-bin.zip
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
|
|
|
@ -8,7 +8,7 @@ elif [[ $# -gt 1 ]]; then
|
|||
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1
|
||||
fi
|
||||
|
||||
gradleVersion="7.3.2"
|
||||
gradleVersion="7.3.3"
|
||||
|
||||
if [[ justInstall -eq 0 ]]; then
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ import eu.openaire.urls_controller.payloads.responces.AssignmentsResponse;
|
|||
import eu.openaire.urls_controller.util.ControllerConstants;
|
||||
import eu.openaire.urls_controller.util.FileUtils;
|
||||
import eu.openaire.urls_controller.util.GenericUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.HttpStatus;
|
||||
|
@ -306,21 +307,21 @@ public class UrlController {
|
|||
ImpalaConnector.databaseLock.lock();
|
||||
|
||||
// Store the workerReport into the database.
|
||||
String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".payload (id, original_url, actual_url, date, mimetype, size, hash, location, provenance) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";
|
||||
String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, date, status, error_class, error_message) VALUES (?, ?, ?, ?, ?, ?)";
|
||||
String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".payload (id, original_url, actual_url, `date`, mimetype, size, `hash`, `location`, provenance) VALUES ";
|
||||
String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, `date`, status, error_class, error_message) VALUES ";
|
||||
|
||||
String tempInsertQueryName = null;
|
||||
PreparedStatement preparedInsertPayloadStatement = null, preparedInsertAttemptStatement = null;
|
||||
Statement insertPayloadStatement = null, insertAttemptStatement = null;
|
||||
try {
|
||||
tempInsertQueryName = "insertIntoPayloadBaseQuery";
|
||||
preparedInsertPayloadStatement = con.prepareStatement(insertIntoPayloadBaseQuery);
|
||||
insertPayloadStatement = con.createStatement();
|
||||
tempInsertQueryName = "insertIntoAttemptBaseQuery";
|
||||
preparedInsertAttemptStatement = con.prepareStatement(insertIntoAttemptBaseQuery);
|
||||
insertAttemptStatement = con.createStatement();
|
||||
} catch (SQLException sqle) {
|
||||
ImpalaConnector.databaseLock.unlock();
|
||||
String errorMsg = "Problem when creating the prepared statement for \"" + tempInsertQueryName + "\"!\n";
|
||||
String errorMsg = "Problem when creating the statement for \"" + tempInsertQueryName + "\"!\n";
|
||||
logger.error(errorMsg + sqle.getMessage());
|
||||
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, con);
|
||||
closeStatements(insertPayloadStatement, insertAttemptStatement, con);
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
||||
}
|
||||
|
||||
|
@ -330,13 +331,17 @@ public class UrlController {
|
|||
ImpalaConnector.databaseLock.unlock();
|
||||
String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!\n";
|
||||
logger.error(errorMsg + sqle.getMessage());
|
||||
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, con);
|
||||
closeStatements(insertPayloadStatement, insertAttemptStatement, con);
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
||||
}
|
||||
|
||||
String payloadErrorMsg = null;
|
||||
int failedCount = 0;
|
||||
|
||||
// TODO - Think about handling this loop with multiple threads..
|
||||
// The Impala-server will handle the synchronization itself..
|
||||
// Check online what happens with "statement.setPoolable()" does it improves speed? in multi or also in single thread?
|
||||
|
||||
for ( UrlReport urlReport : urlReports ) {
|
||||
Payload payload = urlReport.getPayload();
|
||||
if ( payload == null ) {
|
||||
|
@ -345,26 +350,20 @@ public class UrlController {
|
|||
continue;
|
||||
}
|
||||
|
||||
try { // We use a "PreparedStatement" to do insertions, for security reasons.
|
||||
preparedInsertPayloadStatement.setString(1, payload.getId());
|
||||
preparedInsertPayloadStatement.setString(2, payload.getOriginal_url());
|
||||
preparedInsertPayloadStatement.setString(3, payload.getActual_url());
|
||||
preparedInsertPayloadStatement.setTimestamp(4, payload.getTimestamp_acquired());
|
||||
preparedInsertPayloadStatement.setString(5, payload.getMime_type());
|
||||
|
||||
try {
|
||||
// The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers.
|
||||
String stringSize = null;
|
||||
Long size = payload.getSize();
|
||||
if ( size != null )
|
||||
stringSize = String.valueOf(size);
|
||||
|
||||
preparedInsertPayloadStatement.setString(6, stringSize);
|
||||
preparedInsertPayloadStatement.setString(7, payload.getHash());
|
||||
preparedInsertPayloadStatement.setString(8, payload.getLocation());
|
||||
preparedInsertPayloadStatement.setString(9, payload.getProvenance());
|
||||
preparedInsertPayloadStatement.executeUpdate();
|
||||
String insertIntoPayloadFullQuery = insertIntoPayloadBaseQuery + "('" + payload.getId() + "','" + payload.getOriginal_url() + "','" + payload.getActual_url() + "','"
|
||||
+ payload.getTimestamp_acquired() + "','" + payload.getMime_type() + "','" + stringSize + "','" + payload.getHash() + "','"
|
||||
+ payload.getLocation() + "','" + payload.getProvenance() + "')";
|
||||
|
||||
insertPayloadStatement.execute(insertIntoPayloadFullQuery);
|
||||
} catch (SQLException sqle) {
|
||||
logger.error("Problem when executing the \"insertIntoPayloadBaseQuery\": " + sqle.getMessage() + "\n\n");
|
||||
logger.error("Problem when executing the \"insertIntoPayloadFullQuery\": " + sqle.getMessage() + "\n\n");
|
||||
}
|
||||
|
||||
Error error = urlReport.getError();
|
||||
|
@ -373,14 +372,15 @@ public class UrlController {
|
|||
error = new Error(null, null);
|
||||
}
|
||||
|
||||
try { // We use a "PreparedStatement" to do insertions, for security reasons.
|
||||
preparedInsertAttemptStatement.setString(1, payload.getId());
|
||||
preparedInsertAttemptStatement.setString(2, payload.getOriginal_url());
|
||||
preparedInsertAttemptStatement.setTimestamp(3, payload.getTimestamp_acquired());
|
||||
preparedInsertAttemptStatement.setString(4, urlReport.getStatus().toString());
|
||||
preparedInsertAttemptStatement.setString(5, String.valueOf(error.getType())); // This covers the case of "null".
|
||||
preparedInsertAttemptStatement.setString(6, error.getMessage());
|
||||
preparedInsertAttemptStatement.executeUpdate();
|
||||
try {
|
||||
String errorCause = error.getMessage();
|
||||
if ( errorCause != null )
|
||||
errorCause = StringUtils.replace(errorCause, "'", "\\'", -1); // Escape single quotes in the error-cause-message.
|
||||
|
||||
String insertIntoAttemptFullQuery = insertIntoAttemptBaseQuery + "('" + payload.getId() + "','" + payload.getOriginal_url() + "','"
|
||||
+ payload.getTimestamp_acquired() + "','" + urlReport.getStatus().toString() + "','" + error.getType() + "','" + errorCause + "')";
|
||||
|
||||
insertAttemptStatement.execute(insertIntoAttemptFullQuery);
|
||||
} catch (SQLException sqle) {
|
||||
logger.error("Problem when executing the \"insertIntoAttemptBaseQuery\": " + sqle.getMessage() + "\n\n");
|
||||
}
|
||||
|
@ -394,7 +394,7 @@ public class UrlController {
|
|||
logger.error(errorMsg + "\n" + sqle.getMessage());
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
||||
} finally {
|
||||
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, null); // Do not close the connection here!
|
||||
closeStatements(insertPayloadStatement, insertAttemptStatement, null); // Do not close the connection here!
|
||||
}
|
||||
|
||||
logger.debug("Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables. Going to merge the parquet files for those tables.");
|
||||
|
@ -470,17 +470,17 @@ public class UrlController {
|
|||
}
|
||||
|
||||
|
||||
private boolean closePreparedStatements(PreparedStatement preparedStatement1, PreparedStatement preparedStatement2, Connection con) {
|
||||
private boolean closeStatements(Statement statement1, Statement statement2, Connection con) {
|
||||
try {
|
||||
if ( preparedStatement1 != null )
|
||||
preparedStatement1.close();
|
||||
if ( preparedStatement2 != null )
|
||||
preparedStatement2.close();
|
||||
if ( statement1 != null )
|
||||
statement1.close();
|
||||
if ( statement2 != null )
|
||||
statement2.close();
|
||||
if ( con != null )
|
||||
con.close(); // It may have already closed and that's fine.
|
||||
return true;
|
||||
} catch (SQLException sqle) {
|
||||
logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage());
|
||||
logger.error("Could not close the statements or the connection with the Impala-database.\n" + sqle.getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -168,9 +168,9 @@ public class FileUtils {
|
|||
try ( ResultSet resultSet = getFileLocationForHashPreparedStatement.executeQuery() ) {
|
||||
if ( resultSet.next() ) { // Move the "cursor" to the first row. If there is any data..
|
||||
fileLocation = resultSet.getString(1);
|
||||
if ( fileLocation != null ) { // If the full-text of this record is already-found.
|
||||
if ( fileLocation != null ) { // If the full-text of this record is already-found and uploaded.
|
||||
payload.setLocation(fileLocation); // Set the location to the older identical file, which was uploaded to S3.
|
||||
logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + fileLocation + "\".");
|
||||
//logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + fileLocation + "\"."); // DEBUG!
|
||||
numFilesFoundFromPreviousAssignmentsBatches ++;
|
||||
continue;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue