- Optimize file-related tasks.

- Update dependencies. - Code cleanup.
2022-05-26 15:43:59 +03:00 · 2022-05-26 15:43:59 +03:00 · e3b374a32f
parent 9096137008
commit e3b374a32f
5 changed files with 10 additions and 21 deletions
--- a/build.gradle
+++ b/build.gradle
@ -1,5 +1,5 @@
 plugins {
-    id 'org.springframework.boot' version '2.6.6'
+    id 'org.springframework.boot' version '2.7.0'
    id 'io.spring.dependency-management' version '1.0.11.RELEASE'
    id 'java'
 }
@ -37,7 +37,7 @@ dependencies {
    // Enable the validation annotations.
    //implementation group: 'javax.validation', name: 'validation-api', version: '2.0.1.Final'

-    implementation "org.projectlombok:lombok:1.18.22"
+    implementation "org.projectlombok:lombok:1.18.24"

    // https://mvnrepository.com/artifact/com.google.guava/guava
    implementation group: 'com.google.guava', name: 'guava', version: '31.1-jre'
@ -45,11 +45,7 @@ dependencies {
    // https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
    implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'

-    implementation 'io.minio:minio:8.3.8'
-
-    // https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp
-    implementation group: 'com.squareup.okhttp3', name: 'okhttp', version: '4.9.3'
-    // This is required by the minio, as Spring < v.2.7.x , uses a version which is not supported by minio.
+    implementation 'io.minio:minio:8.4.1'

    // https://mvnrepository.com/artifact/com.cloudera.impala/jdbc
    implementation("com.cloudera.impala:jdbc:2.5.31") {
--- a/src/main/java/eu/openaire/urls_controller/configuration/ImpalaConnector.java
+++ b/src/main/java/eu/openaire/urls_controller/configuration/ImpalaConnector.java
@ -53,7 +53,7 @@ public class ImpalaConnector {
    private void createDatabase()
    {
        if ( isTestEnvironment ) {
-            logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from database \"" + initialDatabaseName + "\".");
+            logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from initial-database \"" + initialDatabaseName + "\".");
            jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);

            jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication");
--- a/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java
+++ b/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java
@ -5,7 +5,6 @@ import org.slf4j.LoggerFactory;
 import org.springframework.stereotype.Component;

 import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
@ -19,7 +18,7 @@ public class FileUnZipper {
    private static final Logger logger = LoggerFactory.getLogger(FileUnZipper.class);

    public void unzipFolder(Path source, Path target) throws Exception {
-        try ( ZipInputStream zis = new ZipInputStream(new FileInputStream(source.toFile())) ) {
+        try ( ZipInputStream zis = new ZipInputStream(Files.newInputStream(source.toFile().toPath())) ) {
            // Iterate over the files in zip and un-zip them.
            ZipEntry zipEntry = zis.getNextEntry();
            while ( zipEntry != null ) {
--- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java
+++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java
@ -253,7 +253,7 @@ public class FileUtils {

                    // Let's try to upload the file to S3 and update the payloads, either in successful file-uploads (right-away) or not (in the end).
                    try {
-                        // Prepare the filename as: "datasourceid/publicationid(123)::hash.pdf"
+                        // Prepare the filename as: "datasourceid/publicationid::hash.pdf"
                        // All related payloads point to this exact same file, BUT, may be related with different urlIDs, which in turn be related with different datasourceIDs.
                        // This file could have been found from different urlIds and thus be related to multiple datasourceIds.
                        // BUT, since the filename contains a specific urlID, the datasourceId should be the one related to that specific urlID.
@ -419,18 +419,15 @@ public class FileUtils {
    }


-    private final int bufferSize = 20971520; // 20 MB
-
    public boolean saveZipFile(HttpURLConnection conn, File zipFile) {
        InputStream inStream = null;
        FileOutputStream outStream = null;
        try {
            inStream = conn.getInputStream();
            outStream = new FileOutputStream(zipFile);
-            byte[] byteBuffer = new byte[bufferSize]; // 20 MB
-            int bytesRead = -1;
-            while ( (bytesRead = inStream.read(byteBuffer, 0, bufferSize)) != -1 ) {
-                outStream.write(byteBuffer, 0, bytesRead);
+            int readByte;
+            while ( (readByte = inStream.read()) != -1 ) {
+                outStream.write(readByte);
            }
            return true;
        } catch (Exception e) {
--- a/src/main/java/eu/openaire/urls_controller/util/TestFileUtils.java
+++ b/src/main/java/eu/openaire/urls_controller/util/TestFileUtils.java
@ -22,7 +22,7 @@ public class TestFileUtils {
    public Resource testResource = new ClassPathResource("testInputFiles/orderedList1000.json");

    public ThreadLocal<Integer> duplicateIdUrlEntries;
-    public ThreadLocal<Scanner> inputScanner;
+    public ThreadLocal<Scanner> inputScanner;   // On each request, a new thread is spawned to handle it. So, a new inputScanner is needed.

    private final int jsonBatchSize = 3000;
    private ThreadLocal<Integer> fileIndex;
@ -33,9 +33,6 @@ public class TestFileUtils {

    public TestFileUtils() throws IOException {
        InputStream inputStream = testResource.getInputStream();
-        if ( inputStream == null )
-            throw new RuntimeException("No resourceFile was found with name \"" + testResource.getFilename() + "\"!");
-
        inputScanner = ThreadLocal.withInitial(() -> new Scanner(inputStream, utf8Charset));
        fileIndex = ThreadLocal.withInitial(() -> 0);
        unretrievableInputLines = ThreadLocal.withInitial(() -> 0);