forked from lsmyrnaios/UrlsController
- Optimize file-related tasks.
- Update dependencies. - Code cleanup.
This commit is contained in:
parent
9096137008
commit
e3b374a32f
10
build.gradle
10
build.gradle
|
@ -1,5 +1,5 @@
|
|||
plugins {
|
||||
id 'org.springframework.boot' version '2.6.6'
|
||||
id 'org.springframework.boot' version '2.7.0'
|
||||
id 'io.spring.dependency-management' version '1.0.11.RELEASE'
|
||||
id 'java'
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ dependencies {
|
|||
// Enable the validation annotations.
|
||||
//implementation group: 'javax.validation', name: 'validation-api', version: '2.0.1.Final'
|
||||
|
||||
implementation "org.projectlombok:lombok:1.18.22"
|
||||
implementation "org.projectlombok:lombok:1.18.24"
|
||||
|
||||
// https://mvnrepository.com/artifact/com.google.guava/guava
|
||||
implementation group: 'com.google.guava', name: 'guava', version: '31.1-jre'
|
||||
|
@ -45,11 +45,7 @@ dependencies {
|
|||
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
|
||||
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
|
||||
|
||||
implementation 'io.minio:minio:8.3.8'
|
||||
|
||||
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp
|
||||
implementation group: 'com.squareup.okhttp3', name: 'okhttp', version: '4.9.3'
|
||||
// This is required by the minio, as Spring < v.2.7.x , uses a version which is not supported by minio.
|
||||
implementation 'io.minio:minio:8.4.1'
|
||||
|
||||
// https://mvnrepository.com/artifact/com.cloudera.impala/jdbc
|
||||
implementation("com.cloudera.impala:jdbc:2.5.31") {
|
||||
|
|
|
@ -53,7 +53,7 @@ public class ImpalaConnector {
|
|||
private void createDatabase()
|
||||
{
|
||||
if ( isTestEnvironment ) {
|
||||
logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from database \"" + initialDatabaseName + "\".");
|
||||
logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from initial-database \"" + initialDatabaseName + "\".");
|
||||
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);
|
||||
|
||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication");
|
||||
|
|
|
@ -5,7 +5,6 @@ import org.slf4j.LoggerFactory;
|
|||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
@ -19,7 +18,7 @@ public class FileUnZipper {
|
|||
private static final Logger logger = LoggerFactory.getLogger(FileUnZipper.class);
|
||||
|
||||
public void unzipFolder(Path source, Path target) throws Exception {
|
||||
try ( ZipInputStream zis = new ZipInputStream(new FileInputStream(source.toFile())) ) {
|
||||
try ( ZipInputStream zis = new ZipInputStream(Files.newInputStream(source.toFile().toPath())) ) {
|
||||
// Iterate over the files in zip and un-zip them.
|
||||
ZipEntry zipEntry = zis.getNextEntry();
|
||||
while ( zipEntry != null ) {
|
||||
|
|
|
@ -253,7 +253,7 @@ public class FileUtils {
|
|||
|
||||
// Let's try to upload the file to S3 and update the payloads, either in successful file-uploads (right-away) or not (in the end).
|
||||
try {
|
||||
// Prepare the filename as: "datasourceid/publicationid(123)::hash.pdf"
|
||||
// Prepare the filename as: "datasourceid/publicationid::hash.pdf"
|
||||
// All related payloads point to this exact same file, BUT, may be related with different urlIDs, which in turn be related with different datasourceIDs.
|
||||
// This file could have been found from different urlIds and thus be related to multiple datasourceIds.
|
||||
// BUT, since the filename contains a specific urlID, the datasourceId should be the one related to that specific urlID.
|
||||
|
@ -419,18 +419,15 @@ public class FileUtils {
|
|||
}
|
||||
|
||||
|
||||
private final int bufferSize = 20971520; // 20 MB
|
||||
|
||||
public boolean saveZipFile(HttpURLConnection conn, File zipFile) {
|
||||
InputStream inStream = null;
|
||||
FileOutputStream outStream = null;
|
||||
try {
|
||||
inStream = conn.getInputStream();
|
||||
outStream = new FileOutputStream(zipFile);
|
||||
byte[] byteBuffer = new byte[bufferSize]; // 20 MB
|
||||
int bytesRead = -1;
|
||||
while ( (bytesRead = inStream.read(byteBuffer, 0, bufferSize)) != -1 ) {
|
||||
outStream.write(byteBuffer, 0, bytesRead);
|
||||
int readByte;
|
||||
while ( (readByte = inStream.read()) != -1 ) {
|
||||
outStream.write(readByte);
|
||||
}
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
|
|
|
@ -22,7 +22,7 @@ public class TestFileUtils {
|
|||
public Resource testResource = new ClassPathResource("testInputFiles/orderedList1000.json");
|
||||
|
||||
public ThreadLocal<Integer> duplicateIdUrlEntries;
|
||||
public ThreadLocal<Scanner> inputScanner;
|
||||
public ThreadLocal<Scanner> inputScanner; // On each request, a new thread is spawned to handle it. So, a new inputScanner is needed.
|
||||
|
||||
private final int jsonBatchSize = 3000;
|
||||
private ThreadLocal<Integer> fileIndex;
|
||||
|
@ -33,9 +33,6 @@ public class TestFileUtils {
|
|||
|
||||
public TestFileUtils() throws IOException {
|
||||
InputStream inputStream = testResource.getInputStream();
|
||||
if ( inputStream == null )
|
||||
throw new RuntimeException("No resourceFile was found with name \"" + testResource.getFilename() + "\"!");
|
||||
|
||||
inputScanner = ThreadLocal.withInitial(() -> new Scanner(inputStream, utf8Charset));
|
||||
fileIndex = ThreadLocal.withInitial(() -> 0);
|
||||
unretrievableInputLines = ThreadLocal.withInitial(() -> 0);
|
||||
|
|
Loading…
Reference in New Issue