Browse Source

- Workaround a bug of Impala-JDBC-Driver, when creating insert-prepared-statements.

- Update dependencies.
springify_project
Lampros Smyrnaios 5 months ago
parent
commit
82bf11b9b3
  1. 5
      build.gradle
  2. 2
      gradle/wrapper/gradle-wrapper.properties
  3. 2
      installAndRun.sh
  4. 72
      src/main/java/eu/openaire/urls_controller/controllers/UrlController.java
  5. 4
      src/main/java/eu/openaire/urls_controller/util/FileUtils.java

5
build.gradle

@ -1,5 +1,5 @@
plugins {
id 'org.springframework.boot' version '2.6.1'
id 'org.springframework.boot' version '2.6.2'
id 'io.spring.dependency-management' version '1.0.11.RELEASE'
id 'java'
}
@ -43,6 +43,9 @@ dependencies {
// https://mvnrepository.com/artifact/com.google.guava/guava
implementation group: 'com.google.guava', name: 'guava', version: '31.0.1-jre'
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
implementation 'io.minio:minio:8.3.4'
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp

2
gradle/wrapper/gradle-wrapper.properties vendored

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

2
installAndRun.sh

@ -8,7 +8,7 @@ elif [[ $# -gt 1 ]]; then
echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh <justInstall: 0 | 1>"; exit 1
fi
gradleVersion="7.3.2"
gradleVersion="7.3.3"
if [[ justInstall -eq 0 ]]; then

72
src/main/java/eu/openaire/urls_controller/controllers/UrlController.java

@ -9,6 +9,7 @@ import eu.openaire.urls_controller.payloads.responces.AssignmentsResponse;
import eu.openaire.urls_controller.util.ControllerConstants;
import eu.openaire.urls_controller.util.FileUtils;
import eu.openaire.urls_controller.util.GenericUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpStatus;
@ -306,21 +307,21 @@ public class UrlController {
ImpalaConnector.databaseLock.lock();
// Store the workerReport into the database.
String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".payload (id, original_url, actual_url, date, mimetype, size, hash, location, provenance) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";
String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, date, status, error_class, error_message) VALUES (?, ?, ?, ?, ?, ?)";
String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".payload (id, original_url, actual_url, `date`, mimetype, size, `hash`, `location`, provenance) VALUES ";
String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, `date`, status, error_class, error_message) VALUES ";
String tempInsertQueryName = null;
PreparedStatement preparedInsertPayloadStatement = null, preparedInsertAttemptStatement = null;
Statement insertPayloadStatement = null, insertAttemptStatement = null;
try {
tempInsertQueryName = "insertIntoPayloadBaseQuery";
preparedInsertPayloadStatement = con.prepareStatement(insertIntoPayloadBaseQuery);
insertPayloadStatement = con.createStatement();
tempInsertQueryName = "insertIntoAttemptBaseQuery";
preparedInsertAttemptStatement = con.prepareStatement(insertIntoAttemptBaseQuery);
insertAttemptStatement = con.createStatement();
} catch (SQLException sqle) {
ImpalaConnector.databaseLock.unlock();
String errorMsg = "Problem when creating the prepared statement for \"" + tempInsertQueryName + "\"!\n";
String errorMsg = "Problem when creating the statement for \"" + tempInsertQueryName + "\"!\n";
logger.error(errorMsg + sqle.getMessage());
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, con);
closeStatements(insertPayloadStatement, insertAttemptStatement, con);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
}
@ -330,13 +331,17 @@ public class UrlController {
ImpalaConnector.databaseLock.unlock();
String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!\n";
logger.error(errorMsg + sqle.getMessage());
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, con);
closeStatements(insertPayloadStatement, insertAttemptStatement, con);
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
}
String payloadErrorMsg = null;
int failedCount = 0;
// TODO - Think about handling this loop with multiple threads..
// The Impala-server will handle the synchronization itself..
// Check online what happens with "statement.setPoolable()" does it improves speed? in multi or also in single thread?
for ( UrlReport urlReport : urlReports ) {
Payload payload = urlReport.getPayload();
if ( payload == null ) {
@ -345,26 +350,20 @@ public class UrlController {
continue;
}
try { // We use a "PreparedStatement" to do insertions, for security reasons.
preparedInsertPayloadStatement.setString(1, payload.getId());
preparedInsertPayloadStatement.setString(2, payload.getOriginal_url());
preparedInsertPayloadStatement.setString(3, payload.getActual_url());
preparedInsertPayloadStatement.setTimestamp(4, payload.getTimestamp_acquired());
preparedInsertPayloadStatement.setString(5, payload.getMime_type());
try {
// The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers.
String stringSize = null;
Long size = payload.getSize();
if ( size != null )
stringSize = String.valueOf(size);
preparedInsertPayloadStatement.setString(6, stringSize);
preparedInsertPayloadStatement.setString(7, payload.getHash());
preparedInsertPayloadStatement.setString(8, payload.getLocation());
preparedInsertPayloadStatement.setString(9, payload.getProvenance());
preparedInsertPayloadStatement.executeUpdate();
String insertIntoPayloadFullQuery = insertIntoPayloadBaseQuery + "('" + payload.getId() + "','" + payload.getOriginal_url() + "','" + payload.getActual_url() + "','"
+ payload.getTimestamp_acquired() + "','" + payload.getMime_type() + "','" + stringSize + "','" + payload.getHash() + "','"
+ payload.getLocation() + "','" + payload.getProvenance() + "')";
insertPayloadStatement.execute(insertIntoPayloadFullQuery);
} catch (SQLException sqle) {
logger.error("Problem when executing the \"insertIntoPayloadBaseQuery\": " + sqle.getMessage() + "\n\n");
logger.error("Problem when executing the \"insertIntoPayloadFullQuery\": " + sqle.getMessage() + "\n\n");
}
Error error = urlReport.getError();
@ -373,14 +372,15 @@ public class UrlController {
error = new Error(null, null);
}
try { // We use a "PreparedStatement" to do insertions, for security reasons.
preparedInsertAttemptStatement.setString(1, payload.getId());
preparedInsertAttemptStatement.setString(2, payload.getOriginal_url());
preparedInsertAttemptStatement.setTimestamp(3, payload.getTimestamp_acquired());
preparedInsertAttemptStatement.setString(4, urlReport.getStatus().toString());
preparedInsertAttemptStatement.setString(5, String.valueOf(error.getType())); // This covers the case of "null".
preparedInsertAttemptStatement.setString(6, error.getMessage());
preparedInsertAttemptStatement.executeUpdate();
try {
String errorCause = error.getMessage();
if ( errorCause != null )
errorCause = StringUtils.replace(errorCause, "'", "\\'", -1); // Escape single quotes in the error-cause-message.
String insertIntoAttemptFullQuery = insertIntoAttemptBaseQuery + "('" + payload.getId() + "','" + payload.getOriginal_url() + "','"
+ payload.getTimestamp_acquired() + "','" + urlReport.getStatus().toString() + "','" + error.getType() + "','" + errorCause + "')";
insertAttemptStatement.execute(insertIntoAttemptFullQuery);
} catch (SQLException sqle) {
logger.error("Problem when executing the \"insertIntoAttemptBaseQuery\": " + sqle.getMessage() + "\n\n");
}
@ -394,7 +394,7 @@ public class UrlController {
logger.error(errorMsg + "\n" + sqle.getMessage());
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg);
} finally {
closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, null); // Do not close the connection here!
closeStatements(insertPayloadStatement, insertAttemptStatement, null); // Do not close the connection here!
}
logger.debug("Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables. Going to merge the parquet files for those tables.");
@ -470,17 +470,17 @@ public class UrlController {
}
private boolean closePreparedStatements(PreparedStatement preparedStatement1, PreparedStatement preparedStatement2, Connection con) {
private boolean closeStatements(Statement statement1, Statement statement2, Connection con) {
try {
if ( preparedStatement1 != null )
preparedStatement1.close();
if ( preparedStatement2 != null )
preparedStatement2.close();
if ( statement1 != null )
statement1.close();
if ( statement2 != null )
statement2.close();
if ( con != null )
con.close(); // It may have already closed and that's fine.
return true;
} catch (SQLException sqle) {
logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage());
logger.error("Could not close the statements or the connection with the Impala-database.\n" + sqle.getMessage());
return false;
}
}

4
src/main/java/eu/openaire/urls_controller/util/FileUtils.java

@ -168,9 +168,9 @@ public class FileUtils {
try ( ResultSet resultSet = getFileLocationForHashPreparedStatement.executeQuery() ) {
if ( resultSet.next() ) { // Move the "cursor" to the first row. If there is any data..
fileLocation = resultSet.getString(1);
if ( fileLocation != null ) { // If the full-text of this record is already-found.
if ( fileLocation != null ) { // If the full-text of this record is already-found and uploaded.
payload.setLocation(fileLocation); // Set the location to the older identical file, which was uploaded to S3.
logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + fileLocation + "\".");
//logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + fileLocation + "\"."); // DEBUG!
numFilesFoundFromPreviousAssignmentsBatches ++;
continue;
}

Loading…
Cancel
Save