From e3b374a32fd8efe4b5049fc0c50501b54eca15f3 Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Thu, 26 May 2022 15:43:59 +0300 Subject: [PATCH] - Optimize file-related tasks. - Update dependencies. - Code cleanup. --- build.gradle | 10 +++------- .../configuration/ImpalaConnector.java | 2 +- .../openaire/urls_controller/util/FileUnZipper.java | 3 +-- .../eu/openaire/urls_controller/util/FileUtils.java | 11 ++++------- .../openaire/urls_controller/util/TestFileUtils.java | 5 +---- 5 files changed, 10 insertions(+), 21 deletions(-) diff --git a/build.gradle b/build.gradle index e11f3db..6f90eac 100644 --- a/build.gradle +++ b/build.gradle @@ -1,5 +1,5 @@ plugins { - id 'org.springframework.boot' version '2.6.6' + id 'org.springframework.boot' version '2.7.0' id 'io.spring.dependency-management' version '1.0.11.RELEASE' id 'java' } @@ -37,7 +37,7 @@ dependencies { // Enable the validation annotations. //implementation group: 'javax.validation', name: 'validation-api', version: '2.0.1.Final' - implementation "org.projectlombok:lombok:1.18.22" + implementation "org.projectlombok:lombok:1.18.24" // https://mvnrepository.com/artifact/com.google.guava/guava implementation group: 'com.google.guava', name: 'guava', version: '31.1-jre' @@ -45,11 +45,7 @@ dependencies { // https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0' - implementation 'io.minio:minio:8.3.8' - - // https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp - implementation group: 'com.squareup.okhttp3', name: 'okhttp', version: '4.9.3' - // This is required by the minio, as Spring < v.2.7.x , uses a version which is not supported by minio. + implementation 'io.minio:minio:8.4.1' // https://mvnrepository.com/artifact/com.cloudera.impala/jdbc implementation("com.cloudera.impala:jdbc:2.5.31") { diff --git a/src/main/java/eu/openaire/urls_controller/configuration/ImpalaConnector.java b/src/main/java/eu/openaire/urls_controller/configuration/ImpalaConnector.java index be99e09..1f3e2f6 100644 --- a/src/main/java/eu/openaire/urls_controller/configuration/ImpalaConnector.java +++ b/src/main/java/eu/openaire/urls_controller/configuration/ImpalaConnector.java @@ -53,7 +53,7 @@ public class ImpalaConnector { private void createDatabase() { if ( isTestEnvironment ) { - logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from database \"" + initialDatabaseName + "\"."); + logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from initial-database \"" + initialDatabaseName + "\"."); jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName); jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication"); diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java b/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java index e943d6a..3a459a2 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUnZipper.java @@ -5,7 +5,6 @@ import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -19,7 +18,7 @@ public class FileUnZipper { private static final Logger logger = LoggerFactory.getLogger(FileUnZipper.class); public void unzipFolder(Path source, Path target) throws Exception { - try ( ZipInputStream zis = new ZipInputStream(new FileInputStream(source.toFile())) ) { + try ( ZipInputStream zis = new ZipInputStream(Files.newInputStream(source.toFile().toPath())) ) { // Iterate over the files in zip and un-zip them. ZipEntry zipEntry = zis.getNextEntry(); while ( zipEntry != null ) { diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java index 152ab50..d3061a0 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java @@ -253,7 +253,7 @@ public class FileUtils { // Let's try to upload the file to S3 and update the payloads, either in successful file-uploads (right-away) or not (in the end). try { - // Prepare the filename as: "datasourceid/publicationid(123)::hash.pdf" + // Prepare the filename as: "datasourceid/publicationid::hash.pdf" // All related payloads point to this exact same file, BUT, may be related with different urlIDs, which in turn be related with different datasourceIDs. // This file could have been found from different urlIds and thus be related to multiple datasourceIds. // BUT, since the filename contains a specific urlID, the datasourceId should be the one related to that specific urlID. @@ -419,18 +419,15 @@ public class FileUtils { } - private final int bufferSize = 20971520; // 20 MB - public boolean saveZipFile(HttpURLConnection conn, File zipFile) { InputStream inStream = null; FileOutputStream outStream = null; try { inStream = conn.getInputStream(); outStream = new FileOutputStream(zipFile); - byte[] byteBuffer = new byte[bufferSize]; // 20 MB - int bytesRead = -1; - while ( (bytesRead = inStream.read(byteBuffer, 0, bufferSize)) != -1 ) { - outStream.write(byteBuffer, 0, bytesRead); + int readByte; + while ( (readByte = inStream.read()) != -1 ) { + outStream.write(readByte); } return true; } catch (Exception e) { diff --git a/src/main/java/eu/openaire/urls_controller/util/TestFileUtils.java b/src/main/java/eu/openaire/urls_controller/util/TestFileUtils.java index 1386445..3e104b7 100644 --- a/src/main/java/eu/openaire/urls_controller/util/TestFileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/TestFileUtils.java @@ -22,7 +22,7 @@ public class TestFileUtils { public Resource testResource = new ClassPathResource("testInputFiles/orderedList1000.json"); public ThreadLocal duplicateIdUrlEntries; - public ThreadLocal inputScanner; + public ThreadLocal inputScanner; // On each request, a new thread is spawned to handle it. So, a new inputScanner is needed. private final int jsonBatchSize = 3000; private ThreadLocal fileIndex; @@ -33,9 +33,6 @@ public class TestFileUtils { public TestFileUtils() throws IOException { InputStream inputStream = testResource.getInputStream(); - if ( inputStream == null ) - throw new RuntimeException("No resourceFile was found with name \"" + testResource.getFilename() + "\"!"); - inputScanner = ThreadLocal.withInitial(() -> new Scanner(inputStream, utf8Charset)); fileIndex = ThreadLocal.withInitial(() -> 0); unretrievableInputLines = ThreadLocal.withInitial(() -> 0);