diff --git a/build.gradle b/build.gradle index bc174c4..e0623d0 100644 --- a/build.gradle +++ b/build.gradle @@ -1,5 +1,5 @@ plugins { - id 'org.springframework.boot' version '2.6.1' + id 'org.springframework.boot' version '2.6.2' id 'io.spring.dependency-management' version '1.0.11.RELEASE' id 'java' } @@ -43,6 +43,9 @@ dependencies { // https://mvnrepository.com/artifact/com.google.guava/guava implementation group: 'com.google.guava', name: 'guava', version: '31.0.1-jre' + // https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 + implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0' + implementation 'io.minio:minio:8.3.4' // https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index d2880ba..2e6e589 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.2-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/installAndRun.sh b/installAndRun.sh index bfcf427..774ce96 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -8,7 +8,7 @@ elif [[ $# -gt 1 ]]; then echo -e "Wrong number of arguments given: ${#}\nPlease execute it like: script.sh "; exit 1 fi -gradleVersion="7.3.2" +gradleVersion="7.3.3" if [[ justInstall -eq 0 ]]; then diff --git a/src/main/java/eu/openaire/urls_controller/controllers/UrlController.java b/src/main/java/eu/openaire/urls_controller/controllers/UrlController.java index 6d1d2d7..a281469 100644 --- a/src/main/java/eu/openaire/urls_controller/controllers/UrlController.java +++ b/src/main/java/eu/openaire/urls_controller/controllers/UrlController.java @@ -9,6 +9,7 @@ import eu.openaire.urls_controller.payloads.responces.AssignmentsResponse; import eu.openaire.urls_controller.util.ControllerConstants; import eu.openaire.urls_controller.util.FileUtils; import eu.openaire.urls_controller.util.GenericUtils; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.http.HttpStatus; @@ -306,21 +307,21 @@ public class UrlController { ImpalaConnector.databaseLock.lock(); // Store the workerReport into the database. - String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".payload (id, original_url, actual_url, date, mimetype, size, hash, location, provenance) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"; - String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, date, status, error_class, error_message) VALUES (?, ?, ?, ?, ?, ?)"; + String insertIntoPayloadBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".payload (id, original_url, actual_url, `date`, mimetype, size, `hash`, `location`, provenance) VALUES "; + String insertIntoAttemptBaseQuery = "INSERT INTO " + ImpalaConnector.databaseName + ".attempt (id, original_url, `date`, status, error_class, error_message) VALUES "; String tempInsertQueryName = null; - PreparedStatement preparedInsertPayloadStatement = null, preparedInsertAttemptStatement = null; + Statement insertPayloadStatement = null, insertAttemptStatement = null; try { tempInsertQueryName = "insertIntoPayloadBaseQuery"; - preparedInsertPayloadStatement = con.prepareStatement(insertIntoPayloadBaseQuery); + insertPayloadStatement = con.createStatement(); tempInsertQueryName = "insertIntoAttemptBaseQuery"; - preparedInsertAttemptStatement = con.prepareStatement(insertIntoAttemptBaseQuery); + insertAttemptStatement = con.createStatement(); } catch (SQLException sqle) { ImpalaConnector.databaseLock.unlock(); - String errorMsg = "Problem when creating the prepared statement for \"" + tempInsertQueryName + "\"!\n"; + String errorMsg = "Problem when creating the statement for \"" + tempInsertQueryName + "\"!\n"; logger.error(errorMsg + sqle.getMessage()); - closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, con); + closeStatements(insertPayloadStatement, insertAttemptStatement, con); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg); } @@ -330,13 +331,17 @@ public class UrlController { ImpalaConnector.databaseLock.unlock(); String errorMsg = "Problem when setting Connection.AutoCommit to \"false\"!\n"; logger.error(errorMsg + sqle.getMessage()); - closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, con); + closeStatements(insertPayloadStatement, insertAttemptStatement, con); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg); } String payloadErrorMsg = null; int failedCount = 0; + // TODO - Think about handling this loop with multiple threads.. + // The Impala-server will handle the synchronization itself.. + // Check online what happens with "statement.setPoolable()" does it improves speed? in multi or also in single thread? + for ( UrlReport urlReport : urlReports ) { Payload payload = urlReport.getPayload(); if ( payload == null ) { @@ -345,26 +350,20 @@ public class UrlController { continue; } - try { // We use a "PreparedStatement" to do insertions, for security reasons. - preparedInsertPayloadStatement.setString(1, payload.getId()); - preparedInsertPayloadStatement.setString(2, payload.getOriginal_url()); - preparedInsertPayloadStatement.setString(3, payload.getActual_url()); - preparedInsertPayloadStatement.setTimestamp(4, payload.getTimestamp_acquired()); - preparedInsertPayloadStatement.setString(5, payload.getMime_type()); - + try { // The column "size" in the table is of type "String" so we cast the Long to String. The Parquet-format in the database does not work well with integers. String stringSize = null; Long size = payload.getSize(); if ( size != null ) stringSize = String.valueOf(size); - preparedInsertPayloadStatement.setString(6, stringSize); - preparedInsertPayloadStatement.setString(7, payload.getHash()); - preparedInsertPayloadStatement.setString(8, payload.getLocation()); - preparedInsertPayloadStatement.setString(9, payload.getProvenance()); - preparedInsertPayloadStatement.executeUpdate(); + String insertIntoPayloadFullQuery = insertIntoPayloadBaseQuery + "('" + payload.getId() + "','" + payload.getOriginal_url() + "','" + payload.getActual_url() + "','" + + payload.getTimestamp_acquired() + "','" + payload.getMime_type() + "','" + stringSize + "','" + payload.getHash() + "','" + + payload.getLocation() + "','" + payload.getProvenance() + "')"; + + insertPayloadStatement.execute(insertIntoPayloadFullQuery); } catch (SQLException sqle) { - logger.error("Problem when executing the \"insertIntoPayloadBaseQuery\": " + sqle.getMessage() + "\n\n"); + logger.error("Problem when executing the \"insertIntoPayloadFullQuery\": " + sqle.getMessage() + "\n\n"); } Error error = urlReport.getError(); @@ -373,14 +372,15 @@ public class UrlController { error = new Error(null, null); } - try { // We use a "PreparedStatement" to do insertions, for security reasons. - preparedInsertAttemptStatement.setString(1, payload.getId()); - preparedInsertAttemptStatement.setString(2, payload.getOriginal_url()); - preparedInsertAttemptStatement.setTimestamp(3, payload.getTimestamp_acquired()); - preparedInsertAttemptStatement.setString(4, urlReport.getStatus().toString()); - preparedInsertAttemptStatement.setString(5, String.valueOf(error.getType())); // This covers the case of "null". - preparedInsertAttemptStatement.setString(6, error.getMessage()); - preparedInsertAttemptStatement.executeUpdate(); + try { + String errorCause = error.getMessage(); + if ( errorCause != null ) + errorCause = StringUtils.replace(errorCause, "'", "\\'", -1); // Escape single quotes in the error-cause-message. + + String insertIntoAttemptFullQuery = insertIntoAttemptBaseQuery + "('" + payload.getId() + "','" + payload.getOriginal_url() + "','" + + payload.getTimestamp_acquired() + "','" + urlReport.getStatus().toString() + "','" + error.getType() + "','" + errorCause + "')"; + + insertAttemptStatement.execute(insertIntoAttemptFullQuery); } catch (SQLException sqle) { logger.error("Problem when executing the \"insertIntoAttemptBaseQuery\": " + sqle.getMessage() + "\n\n"); } @@ -394,7 +394,7 @@ public class UrlController { logger.error(errorMsg + "\n" + sqle.getMessage()); return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMsg); } finally { - closePreparedStatements(preparedInsertPayloadStatement, preparedInsertAttemptStatement, null); // Do not close the connection here! + closeStatements(insertPayloadStatement, insertAttemptStatement, null); // Do not close the connection here! } logger.debug("Finished inserting the payloads and the attempts into the \"payload\" and \"attempt\" tables. Going to merge the parquet files for those tables."); @@ -470,17 +470,17 @@ public class UrlController { } - private boolean closePreparedStatements(PreparedStatement preparedStatement1, PreparedStatement preparedStatement2, Connection con) { + private boolean closeStatements(Statement statement1, Statement statement2, Connection con) { try { - if ( preparedStatement1 != null ) - preparedStatement1.close(); - if ( preparedStatement2 != null ) - preparedStatement2.close(); + if ( statement1 != null ) + statement1.close(); + if ( statement2 != null ) + statement2.close(); if ( con != null ) con.close(); // It may have already closed and that's fine. return true; } catch (SQLException sqle) { - logger.error("Could not close the connection with the Impala-database.\n" + sqle.getMessage()); + logger.error("Could not close the statements or the connection with the Impala-database.\n" + sqle.getMessage()); return false; } } diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java index 4ee7d49..3bb311c 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java @@ -168,9 +168,9 @@ public class FileUtils { try ( ResultSet resultSet = getFileLocationForHashPreparedStatement.executeQuery() ) { if ( resultSet.next() ) { // Move the "cursor" to the first row. If there is any data.. fileLocation = resultSet.getString(1); - if ( fileLocation != null ) { // If the full-text of this record is already-found. + if ( fileLocation != null ) { // If the full-text of this record is already-found and uploaded. payload.setLocation(fileLocation); // Set the location to the older identical file, which was uploaded to S3. - logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + fileLocation + "\"."); + //logger.debug("The record with ID \"" + payload.getId() + "\" has an \"alreadyRetrieved\" file, with hash \"" + fileHash + "\" and location \"" + fileLocation + "\"."); // DEBUG! numFilesFoundFromPreviousAssignmentsBatches ++; continue; }