- Optimize file-related tasks.

- Update dependencies.
- Code cleanup.
This commit is contained in:
Lampros Smyrnaios 2022-05-26 15:43:59 +03:00
parent 9096137008
commit e3b374a32f
5 changed files with 10 additions and 21 deletions

View File

@ -1,5 +1,5 @@
plugins {
id 'org.springframework.boot' version '2.6.6'
id 'org.springframework.boot' version '2.7.0'
id 'io.spring.dependency-management' version '1.0.11.RELEASE'
id 'java'
}
@ -37,7 +37,7 @@ dependencies {
// Enable the validation annotations.
//implementation group: 'javax.validation', name: 'validation-api', version: '2.0.1.Final'
implementation "org.projectlombok:lombok:1.18.22"
implementation "org.projectlombok:lombok:1.18.24"
// https://mvnrepository.com/artifact/com.google.guava/guava
implementation group: 'com.google.guava', name: 'guava', version: '31.1-jre'
@ -45,11 +45,7 @@ dependencies {
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
implementation 'io.minio:minio:8.3.8'
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp
implementation group: 'com.squareup.okhttp3', name: 'okhttp', version: '4.9.3'
// This is required by the minio, as Spring < v.2.7.x , uses a version which is not supported by minio.
implementation 'io.minio:minio:8.4.1'
// https://mvnrepository.com/artifact/com.cloudera.impala/jdbc
implementation("com.cloudera.impala:jdbc:2.5.31") {

View File

@ -53,7 +53,7 @@ public class ImpalaConnector {
private void createDatabase()
{
if ( isTestEnvironment ) {
logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from database \"" + initialDatabaseName + "\".");
logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from initial-database \"" + initialDatabaseName + "\".");
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication");

View File

@ -5,7 +5,6 @@ import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
@ -19,7 +18,7 @@ public class FileUnZipper {
private static final Logger logger = LoggerFactory.getLogger(FileUnZipper.class);
public void unzipFolder(Path source, Path target) throws Exception {
try ( ZipInputStream zis = new ZipInputStream(new FileInputStream(source.toFile())) ) {
try ( ZipInputStream zis = new ZipInputStream(Files.newInputStream(source.toFile().toPath())) ) {
// Iterate over the files in zip and un-zip them.
ZipEntry zipEntry = zis.getNextEntry();
while ( zipEntry != null ) {

View File

@ -253,7 +253,7 @@ public class FileUtils {
// Let's try to upload the file to S3 and update the payloads, either in successful file-uploads (right-away) or not (in the end).
try {
// Prepare the filename as: "datasourceid/publicationid(123)::hash.pdf"
// Prepare the filename as: "datasourceid/publicationid::hash.pdf"
// All related payloads point to this exact same file, BUT, may be related with different urlIDs, which in turn be related with different datasourceIDs.
// This file could have been found from different urlIds and thus be related to multiple datasourceIds.
// BUT, since the filename contains a specific urlID, the datasourceId should be the one related to that specific urlID.
@ -419,18 +419,15 @@ public class FileUtils {
}
private final int bufferSize = 20971520; // 20 MB
public boolean saveZipFile(HttpURLConnection conn, File zipFile) {
InputStream inStream = null;
FileOutputStream outStream = null;
try {
inStream = conn.getInputStream();
outStream = new FileOutputStream(zipFile);
byte[] byteBuffer = new byte[bufferSize]; // 20 MB
int bytesRead = -1;
while ( (bytesRead = inStream.read(byteBuffer, 0, bufferSize)) != -1 ) {
outStream.write(byteBuffer, 0, bytesRead);
int readByte;
while ( (readByte = inStream.read()) != -1 ) {
outStream.write(readByte);
}
return true;
} catch (Exception e) {

View File

@ -22,7 +22,7 @@ public class TestFileUtils {
public Resource testResource = new ClassPathResource("testInputFiles/orderedList1000.json");
public ThreadLocal<Integer> duplicateIdUrlEntries;
public ThreadLocal<Scanner> inputScanner;
public ThreadLocal<Scanner> inputScanner; // On each request, a new thread is spawned to handle it. So, a new inputScanner is needed.
private final int jsonBatchSize = 3000;
private ThreadLocal<Integer> fileIndex;
@ -33,9 +33,6 @@ public class TestFileUtils {
public TestFileUtils() throws IOException {
InputStream inputStream = testResource.getInputStream();
if ( inputStream == null )
throw new RuntimeException("No resourceFile was found with name \"" + testResource.getFilename() + "\"!");
inputScanner = ThreadLocal.withInitial(() -> new Scanner(inputStream, utf8Charset));
fileIndex = ThreadLocal.withInitial(() -> 0);
unretrievableInputLines = ThreadLocal.withInitial(() -> 0);