- Workaround a bug of Impala-JDBC-Driver, when creating insert-prepared-statements.
- Update dependencies.
This commit is contained in:
parent
33ba3e8d91
commit
82bf11b9b3
|
@ -1,5 +1,5 @@
|
||||||
plugins {
|
plugins {
|
||||||
id 'org.springframework.boot' version '2.6.1'
|
id 'org.springframework.boot' version '2.6.2'
|
||||||
id 'io.spring.dependency-management' version '1.0.11.RELEASE'
|
id 'io.spring.dependency-management' version '1.0.11.RELEASE'
|
||||||
id 'java'
|
id 'java'
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,9 @@ dependencies {
|
||||||
// https://mvnrepository.com/artifact/com.google.guava/guava
|
// https://mvnrepository.com/artifact/com.google.guava/guava
|
||||||
implementation group: 'com.google.guava', name: 'guava', version: '31.0.1-jre'
|
implementation group: 'com.google.guava', name: 'guava', version: '31.0.1-jre'
|
||||||
|
|
||||||
|
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
|
||||||
|
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
|
||||||
|
|
||||||
implementation 'io.minio:minio:8.3.4'
|
implementation 'io.minio:minio:8.3.4'
|
||||||
|
|
||||||
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp
|
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-bin.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
zipStorePath=wrapper/dists
|
zipStorePath=wrapper/dists
|
||||||
|
|
|
@ -8,7 +8,7 @@ elif [[ $# -gt 1 ]]; then
|
||||||
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1
|
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
gradleVersion="7.3.2"
|
gradleVersion="7.3.3"
|
||||||
|
|
||||||
if [[ justInstall -eq 0 ]]; then
|
if [[ justInstall -eq 0 ]]; then
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ import eu.openaire.urls_controller.payloads.responces.AssignmentsResponse;
|
||||||
import eu.openaire.urls_controller.util.ControllerConstants;
|
import eu.openaire.urls_controller.util.ControllerConstants;
|
||||||
import eu.openaire.urls_controller.util.FileUtils;
|
import eu.openaire.urls_controller.util.FileUtils;
|
||||||
import eu.openaire.urls_controller.util.GenericUtils;
|
import eu.openaire.urls_controller.util.GenericUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.http.HttpStatus;
|
import org.springframework.http.HttpStatus;
|
||||||
|
@ -306,21 +307,21 @@ public class UrlController {
|
||||||
ImpalaConnector.databaseLock.lock();
|
ImpalaConnector.databaseLock.lock();
|
||||||
|
|
||||||
// Store the workerReport into the database.
|
// Store the workerReport into the database.
|
||||||
String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".payload (id, original_url, actual_url, date, mimetype, size, hash, location, provenance) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";
|
String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".payload (id, original_url, actual_url, `date`, mimetype, size, `hash`, `location`, provenance) VALUES ";
|
||||||
String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, date, status, error_class, error_message) VALUES (?, ?, ?, ?, ?, ?)";
|
String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, `date`, status, error_class, error_message) VALUES ";
|
||||||
|
|
||||||
String tempInsertQueryName = null;
|
String tempInsertQueryName = null;
|
||||||
PreparedStatement preparedInsertPayloadStatement = null, preparedInsertAttemptStatement = null;
|
Statement insertPayloadStatement = null, insertAttemptStatement = null;
|
||||||
try {
|
try {
|
||||||
tempInsertQueryName = "insertIntoPayloadBaseQuery";
|
tempInsertQueryName = "insertIntoPayloadBaseQuery";
|
||||||
preparedInsertPayloadStatement = con.prepareStatement(insertIntoPayloadBaseQuery);
|
insertPayloadStatement = con.createStatement();
|
||||||
tempInsertQueryName = "insertIntoAttemptBaseQuery";
|
tempInsertQueryName = "insertIntoAttemptBaseQuery";
|
||||||
preparedInsertAttemptStatement = con.prepareStatement(insertIntoAttemptBaseQuery);
|
insertAttemptStatement = con.createStatement();
|
||||||
} catch (SQLException sqle) {
|
} catch (SQLException sqle) {
|
||||||
ImpalaConnector.databaseLock.unlock();
|
ImpalaConnector.databaseLock.unlock();
|
||||||
String errorMsg = "Problem when creating the prepared statement for \"" + tempInsertQueryName + "\"!\n";
|
String errorMsg = "Problem when creating the statement for \"" + tempInsertQueryName + "\"!\n";
|
||||||
logger.error(errorMsg + sqle.getMessage());
|
logger.error(errorMsg + sqle.getMessage());
|
||||||
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, con);
|
closeStatements(insertPayloadStatement, insertAttemptStatement, con);
|
||||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -330,13 +331,17 @@ public class UrlController {
|
||||||
ImpalaConnector.databaseLock.unlock();
|
ImpalaConnector.databaseLock.unlock();
|
||||||
String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!\n";
|
String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!\n";
|
||||||
logger.error(errorMsg + sqle.getMessage());
|
logger.error(errorMsg + sqle.getMessage());
|
||||||
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, con);
|
closeStatements(insertPayloadStatement, insertAttemptStatement, con);
|
||||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
String payloadErrorMsg = null;
|
String payloadErrorMsg = null;
|
||||||
int failedCount = 0;
|
int failedCount = 0;
|
||||||
|
|
||||||
|
// TODO - Think about handling this loop with multiple threads..
|
||||||
|
// The Impala-server will handle the synchronization itself..
|
||||||
|
// Check online what happens with "statement.setPoolable()" does it improves speed? in multi or also in single thread?
|
||||||
|
|
||||||
for ( UrlReport urlReport : urlReports ) {
|
for ( UrlReport urlReport : urlReports ) {
|
||||||
Payload payload = urlReport.getPayload();
|
Payload payload = urlReport.getPayload();
|
||||||
if ( payload == null ) {
|
if ( payload == null ) {
|
||||||
|
@ -345,26 +350,20 @@ public class UrlController {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
try { // We use a "PreparedStatement" to do insertions, for security reasons.
|
try {
|
||||||
preparedInsertPayloadStatement.setString(1, payload.getId());
|
|
||||||
preparedInsertPayloadStatement.setString(2, payload.getOriginal_url());
|
|
||||||
preparedInsertPayloadStatement.setString(3, payload.getActual_url());
|
|
||||||
preparedInsertPayloadStatement.setTimestamp(4, payload.getTimestamp_acquired());
|
|
||||||
preparedInsertPayloadStatement.setString(5, payload.getMime_type());
|
|
||||||
|
|
||||||
// The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers.
|
// The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers.
|
||||||
String stringSize = null;
|
String stringSize = null;
|
||||||
Long size = payload.getSize();
|
Long size = payload.getSize();
|
||||||
if ( size != null )
|
if ( size != null )
|
||||||
stringSize = String.valueOf(size);
|
stringSize = String.valueOf(size);
|
||||||
|
|
||||||
preparedInsertPayloadStatement.setString(6, stringSize);
|
String insertIntoPayloadFullQuery = insertIntoPayloadBaseQuery + "('" + payload.getId() + "','" + payload.getOriginal_url() + "','" + payload.getActual_url() + "','"
|
||||||
preparedInsertPayloadStatement.setString(7, payload.getHash());
|
+ payload.getTimestamp_acquired() + "','" + payload.getMime_type() + "','" + stringSize + "','" + payload.getHash() + "','"
|
||||||
preparedInsertPayloadStatement.setString(8, payload.getLocation());
|
+ payload.getLocation() + "','" + payload.getProvenance() + "')";
|
||||||
preparedInsertPayloadStatement.setString(9, payload.getProvenance());
|
|
||||||
preparedInsertPayloadStatement.executeUpdate();
|
insertPayloadStatement.execute(insertIntoPayloadFullQuery);
|
||||||
} catch (SQLException sqle) {
|
} catch (SQLException sqle) {
|
||||||
logger.error("Problem when executing the \"insertIntoPayloadBaseQuery\": " + sqle.getMessage() + "\n\n");
|
logger.error("Problem when executing the \"insertIntoPayloadFullQuery\": " + sqle.getMessage() + "\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
Error error = urlReport.getError();
|
Error error = urlReport.getError();
|
||||||
|
@ -373,14 +372,15 @@ public class UrlController {
|
||||||
error = new Error(null, null);
|
error = new Error(null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
try { // We use a "PreparedStatement" to do insertions, for security reasons.
|
try {
|
||||||
preparedInsertAttemptStatement.setString(1, payload.getId());
|
String errorCause = error.getMessage();
|
||||||
preparedInsertAttemptStatement.setString(2, payload.getOriginal_url());
|
if ( errorCause != null )
|
||||||
preparedInsertAttemptStatement.setTimestamp(3, payload.getTimestamp_acquired());
|
errorCause = StringUtils.replace(errorCause, "'", "\\'", -1); // Escape single quotes in the error-cause-message.
|
||||||
preparedInsertAttemptStatement.setString(4, urlReport.getStatus().toString());
|
|
||||||
preparedInsertAttemptStatement.setString(5, String.valueOf(error.getType())); // This covers the case of "null".
|
String insertIntoAttemptFullQuery = insertIntoAttemptBaseQuery + "('" + payload.getId() + "','" + payload.getOriginal_url() + "','"
|
||||||
preparedInsertAttemptStatement.setString(6, error.getMessage());
|
+ payload.getTimestamp_acquired() + "','" + urlReport.getStatus().toString() + "','" + error.getType() + "','" + errorCause + "')";
|
||||||
preparedInsertAttemptStatement.executeUpdate();
|
|
||||||
|
insertAttemptStatement.execute(insertIntoAttemptFullQuery);
|
||||||
} catch (SQLException sqle) {
|
} catch (SQLException sqle) {
|
||||||
logger.error("Problem when executing the \"insertIntoAttemptBaseQuery\": " + sqle.getMessage() + "\n\n");
|
logger.error("Problem when executing the \"insertIntoAttemptBaseQuery\": " + sqle.getMessage() + "\n\n");
|
||||||
}
|
}
|
||||||
|
@ -394,7 +394,7 @@ public class UrlController {
|
||||||
logger.error(errorMsg + "\n" + sqle.getMessage());
|
logger.error(errorMsg + "\n" + sqle.getMessage());
|
||||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
|
||||||
} finally {
|
} finally {
|
||||||
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, null); // Do not close the connection here!
|
closeStatements(insertPayloadStatement, insertAttemptStatement, null); // Do not close the connection here!
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug("Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables. Going to merge the parquet files for those tables.");
|
logger.debug("Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables. Going to merge the parquet files for those tables.");
|
||||||
|
@ -470,17 +470,17 @@ public class UrlController {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private boolean closePreparedStatements(PreparedStatement preparedStatement1, PreparedStatement preparedStatement2, Connection con) {
|
private boolean closeStatements(Statement statement1, Statement statement2, Connection con) {
|
||||||
try {
|
try {
|
||||||
if ( preparedStatement1 != null )
|
if ( statement1 != null )
|
||||||
preparedStatement1.close();
|
statement1.close();
|
||||||
if ( preparedStatement2 != null )
|
if ( statement2 != null )
|
||||||
preparedStatement2.close();
|
statement2.close();
|
||||||
if ( con != null )
|
if ( con != null )
|
||||||
con.close(); // It may have already closed and that's fine.
|
con.close(); // It may have already closed and that's fine.
|
||||||
return true;
|
return true;
|
||||||
} catch (SQLException sqle) {
|
} catch (SQLException sqle) {
|
||||||
logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage());
|
logger.error("Could not close the statements or the connection with the Impala-database.\n" + sqle.getMessage());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -168,9 +168,9 @@ public class FileUtils {
|
||||||
try ( ResultSet resultSet = getFileLocationForHashPreparedStatement.executeQuery() ) {
|
try ( ResultSet resultSet = getFileLocationForHashPreparedStatement.executeQuery() ) {
|
||||||
if ( resultSet.next() ) { // Move the "cursor" to the first row. If there is any data..
|
if ( resultSet.next() ) { // Move the "cursor" to the first row. If there is any data..
|
||||||
fileLocation = resultSet.getString(1);
|
fileLocation = resultSet.getString(1);
|
||||||
if ( fileLocation != null ) { // If the full-text of this record is already-found.
|
if ( fileLocation != null ) { // If the full-text of this record is already-found and uploaded.
|
||||||
payload.setLocation(fileLocation); // Set the location to the older identical file, which was uploaded to S3.
|
payload.setLocation(fileLocation); // Set the location to the older identical file, which was uploaded to S3.
|
||||||
logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + fileLocation + "\".");
|
//logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + fileLocation + "\"."); // DEBUG!
|
||||||
numFilesFoundFromPreviousAssignmentsBatches ++;
|
numFilesFoundFromPreviousAssignmentsBatches ++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue