- Optimize file-related tasks.
- Update dependencies. - Code cleanup.
This commit is contained in:
parent
9096137008
commit
e3b374a32f
10
build.gradle
10
build.gradle
|
@ -1,5 +1,5 @@
|
||||||
plugins {
|
plugins {
|
||||||
id 'org.springframework.boot' version '2.6.6'
|
id 'org.springframework.boot' version '2.7.0'
|
||||||
id 'io.spring.dependency-management' version '1.0.11.RELEASE'
|
id 'io.spring.dependency-management' version '1.0.11.RELEASE'
|
||||||
id 'java'
|
id 'java'
|
||||||
}
|
}
|
||||||
|
@ -37,7 +37,7 @@ dependencies {
|
||||||
// Enable the validation annotations.
|
// Enable the validation annotations.
|
||||||
//implementation group: 'javax.validation', name: 'validation-api', version: '2.0.1.Final'
|
//implementation group: 'javax.validation', name: 'validation-api', version: '2.0.1.Final'
|
||||||
|
|
||||||
implementation "org.projectlombok:lombok:1.18.22"
|
implementation "org.projectlombok:lombok:1.18.24"
|
||||||
|
|
||||||
// https://mvnrepository.com/artifact/com.google.guava/guava
|
// https://mvnrepository.com/artifact/com.google.guava/guava
|
||||||
implementation group: 'com.google.guava', name: 'guava', version: '31.1-jre'
|
implementation group: 'com.google.guava', name: 'guava', version: '31.1-jre'
|
||||||
|
@ -45,11 +45,7 @@ dependencies {
|
||||||
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
|
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
|
||||||
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
|
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
|
||||||
|
|
||||||
implementation 'io.minio:minio:8.3.8'
|
implementation 'io.minio:minio:8.4.1'
|
||||||
|
|
||||||
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp
|
|
||||||
implementation group: 'com.squareup.okhttp3', name: 'okhttp', version: '4.9.3'
|
|
||||||
// This is required by the minio, as Spring < v.2.7.x , uses a version which is not supported by minio.
|
|
||||||
|
|
||||||
// https://mvnrepository.com/artifact/com.cloudera.impala/jdbc
|
// https://mvnrepository.com/artifact/com.cloudera.impala/jdbc
|
||||||
implementation("com.cloudera.impala:jdbc:2.5.31") {
|
implementation("com.cloudera.impala:jdbc:2.5.31") {
|
||||||
|
|
|
@ -53,7 +53,7 @@ public class ImpalaConnector {
|
||||||
private void createDatabase()
|
private void createDatabase()
|
||||||
{
|
{
|
||||||
if ( isTestEnvironment ) {
|
if ( isTestEnvironment ) {
|
||||||
logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from database \"" + initialDatabaseName + "\".");
|
logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from initial-database \"" + initialDatabaseName + "\".");
|
||||||
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);
|
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);
|
||||||
|
|
||||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication");
|
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication");
|
||||||
|
|
|
@ -5,7 +5,6 @@ import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
@ -19,7 +18,7 @@ public class FileUnZipper {
|
||||||
private static final Logger logger = LoggerFactory.getLogger(FileUnZipper.class);
|
private static final Logger logger = LoggerFactory.getLogger(FileUnZipper.class);
|
||||||
|
|
||||||
public void unzipFolder(Path source, Path target) throws Exception {
|
public void unzipFolder(Path source, Path target) throws Exception {
|
||||||
try ( ZipInputStream zis = new ZipInputStream(new FileInputStream(source.toFile())) ) {
|
try ( ZipInputStream zis = new ZipInputStream(Files.newInputStream(source.toFile().toPath())) ) {
|
||||||
// Iterate over the files in zip and un-zip them.
|
// Iterate over the files in zip and un-zip them.
|
||||||
ZipEntry zipEntry = zis.getNextEntry();
|
ZipEntry zipEntry = zis.getNextEntry();
|
||||||
while ( zipEntry != null ) {
|
while ( zipEntry != null ) {
|
||||||
|
|
|
@ -253,7 +253,7 @@ public class FileUtils {
|
||||||
|
|
||||||
// Let's try to upload the file to S3 and update the payloads, either in successful file-uploads (right-away) or not (in the end).
|
// Let's try to upload the file to S3 and update the payloads, either in successful file-uploads (right-away) or not (in the end).
|
||||||
try {
|
try {
|
||||||
// Prepare the filename as: "datasourceid/publicationid(123)::hash.pdf"
|
// Prepare the filename as: "datasourceid/publicationid::hash.pdf"
|
||||||
// All related payloads point to this exact same file, BUT, may be related with different urlIDs, which in turn be related with different datasourceIDs.
|
// All related payloads point to this exact same file, BUT, may be related with different urlIDs, which in turn be related with different datasourceIDs.
|
||||||
// This file could have been found from different urlIds and thus be related to multiple datasourceIds.
|
// This file could have been found from different urlIds and thus be related to multiple datasourceIds.
|
||||||
// BUT, since the filename contains a specific urlID, the datasourceId should be the one related to that specific urlID.
|
// BUT, since the filename contains a specific urlID, the datasourceId should be the one related to that specific urlID.
|
||||||
|
@ -419,18 +419,15 @@ public class FileUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private final int bufferSize = 20971520; // 20 MB
|
|
||||||
|
|
||||||
public boolean saveZipFile(HttpURLConnection conn, File zipFile) {
|
public boolean saveZipFile(HttpURLConnection conn, File zipFile) {
|
||||||
InputStream inStream = null;
|
InputStream inStream = null;
|
||||||
FileOutputStream outStream = null;
|
FileOutputStream outStream = null;
|
||||||
try {
|
try {
|
||||||
inStream = conn.getInputStream();
|
inStream = conn.getInputStream();
|
||||||
outStream = new FileOutputStream(zipFile);
|
outStream = new FileOutputStream(zipFile);
|
||||||
byte[] byteBuffer = new byte[bufferSize]; // 20 MB
|
int readByte;
|
||||||
int bytesRead = -1;
|
while ( (readByte = inStream.read()) != -1 ) {
|
||||||
while ( (bytesRead = inStream.read(byteBuffer, 0, bufferSize)) != -1 ) {
|
outStream.write(readByte);
|
||||||
outStream.write(byteBuffer, 0, bytesRead);
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -22,7 +22,7 @@ public class TestFileUtils {
|
||||||
public Resource testResource = new ClassPathResource("testInputFiles/orderedList1000.json");
|
public Resource testResource = new ClassPathResource("testInputFiles/orderedList1000.json");
|
||||||
|
|
||||||
public ThreadLocal<Integer> duplicateIdUrlEntries;
|
public ThreadLocal<Integer> duplicateIdUrlEntries;
|
||||||
public ThreadLocal<Scanner> inputScanner;
|
public ThreadLocal<Scanner> inputScanner; // On each request, a new thread is spawned to handle it. So, a new inputScanner is needed.
|
||||||
|
|
||||||
private final int jsonBatchSize = 3000;
|
private final int jsonBatchSize = 3000;
|
||||||
private ThreadLocal<Integer> fileIndex;
|
private ThreadLocal<Integer> fileIndex;
|
||||||
|
@ -33,9 +33,6 @@ public class TestFileUtils {
|
||||||
|
|
||||||
public TestFileUtils() throws IOException {
|
public TestFileUtils() throws IOException {
|
||||||
InputStream inputStream = testResource.getInputStream();
|
InputStream inputStream = testResource.getInputStream();
|
||||||
if ( inputStream == null )
|
|
||||||
throw new RuntimeException("No resourceFile was found with name \"" + testResource.getFilename() + "\"!");
|
|
||||||
|
|
||||||
inputScanner = ThreadLocal.withInitial(() -> new Scanner(inputStream, utf8Charset));
|
inputScanner = ThreadLocal.withInitial(() -> new Scanner(inputStream, utf8Charset));
|
||||||
fileIndex = ThreadLocal.withInitial(() -> 0);
|
fileIndex = ThreadLocal.withInitial(() -> 0);
|
||||||
unretrievableInputLines = ThreadLocal.withInitial(() -> 0);
|
unretrievableInputLines = ThreadLocal.withInitial(() -> 0);
|
||||||
|
|
Loading…
Reference in New Issue