forked from lsmyrnaios/UrlsController
- Remove the obsolete "parenthesis" and "increasing duplicate-num" from the full-texts' names, before sending them to the S3-Object-Store. They now end with the "file-hash", so it is guaranteed that they will be unique. The Worker continues to produce the previous kind of names, without any disturbance.
- Improve logging. - Update MinIO dependency.
This commit is contained in:
parent
a81ed3c60f
commit
9b95eebb6c
|
@ -45,7 +45,7 @@ dependencies {
|
||||||
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
|
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
|
||||||
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
|
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
|
||||||
|
|
||||||
implementation 'io.minio:minio:8.3.7'
|
implementation 'io.minio:minio:8.3.8'
|
||||||
|
|
||||||
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp
|
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp
|
||||||
implementation group: 'com.squareup.okhttp3', name: 'okhttp', version: '4.9.3'
|
implementation group: 'com.squareup.okhttp3', name: 'okhttp', version: '4.9.3'
|
||||||
|
|
|
@ -53,7 +53,7 @@ public class ImpalaConnector {
|
||||||
private void createDatabase()
|
private void createDatabase()
|
||||||
{
|
{
|
||||||
if ( isTestEnvironment ) {
|
if ( isTestEnvironment ) {
|
||||||
logger.info("Going to create (if not exist) the database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from database \"" + initialDatabaseName + "\".");
|
logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from database \"" + initialDatabaseName + "\".");
|
||||||
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);
|
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);
|
||||||
|
|
||||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication");
|
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication");
|
||||||
|
@ -72,8 +72,10 @@ public class ImpalaConnector {
|
||||||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".assignment");
|
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".assignment");
|
||||||
|
|
||||||
databaseName = testDatabaseName; // For the rest of the queries.
|
databaseName = testDatabaseName; // For the rest of the queries.
|
||||||
} else
|
} else {
|
||||||
|
logger.info("Going to create or validate the tables that are populated by the Controller, for the \"initialDatabase\" = \"" + initialDatabaseName + "\"");
|
||||||
databaseName = initialDatabaseName;
|
databaseName = initialDatabaseName;
|
||||||
|
}
|
||||||
|
|
||||||
// For both cases, got check and create the tables which will be populated by the Controller.
|
// For both cases, got check and create the tables which will be populated by the Controller.
|
||||||
|
|
||||||
|
|
|
@ -306,7 +306,9 @@ public class FileUtils {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
fileName = datasourceId + "/" + filenameWithoutExtension + "::" + hash + fileExtension;
|
// Use the "fileID" and not the "filenameWithoutExtension", as we want to avoid keeping the possible "parenthesis" with the increasing number (about the duplication of ID-fileName).
|
||||||
|
// Now we append the file-hash, so it is guaranteed that the filename will be unique.
|
||||||
|
fileName = datasourceId + "/" + fileNameID + "::" + hash + fileExtension;
|
||||||
|
|
||||||
String s3Url = s3ObjectStore.uploadToS3(fileName, fileFullPath);
|
String s3Url = s3ObjectStore.uploadToS3(fileName, fileFullPath);
|
||||||
setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url);
|
setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url);
|
||||||
|
|
|
@ -104,9 +104,9 @@ public class S3ObjectStore {
|
||||||
.object(fileObjKeyName).filename(fileFullPath)
|
.object(fileObjKeyName).filename(fileFullPath)
|
||||||
.contentType(contentType).build());
|
.contentType(contentType).build());
|
||||||
|
|
||||||
// TODO - What if the fileObjKeyName already exists? Right now it gets overwritten (unless we add versioning0, which is not currently supported by our S3ObjectStore).
|
// What if the fileObjKeyName already exists? (Default action from S3-Object-Store --> overwrite)
|
||||||
// Each Worker handles some of these cases, but in case of id-urls splitting between different workers or re-attempting some temporarily faulty urls later,
|
// Each Worker handles some of these cases, but in case of id-urls splitting between different workers or re-attempting some temporarily faulty urls later, duplicate fileNames may appear.
|
||||||
// duplicate fileNames may appear and cause file-overwriting from the part of S3ObjectStore.
|
// However, the Controller uses the file-hash (instead of the duplicate -number), so it is 99.99% guaranteed that no overwrites will ever occur.
|
||||||
|
|
||||||
String s3Url = s3Protocol + bucketName + "/" + fileObjKeyName; // Be aware: This url works only if the access to the bucket is public.
|
String s3Url = s3Protocol + bucketName + "/" + fileObjKeyName; // Be aware: This url works only if the access to the bucket is public.
|
||||||
//logger.debug("Uploaded file \"" + fileObjKeyName + "\". The s3Url is: " + s3Url);
|
//logger.debug("Uploaded file \"" + fileObjKeyName + "\". The s3Url is: " + s3Url);
|
||||||
|
|
Loading…
Reference in New Issue