forked from lsmyrnaios/UrlsController
- Remove the obsolete "parenthesis" and "increasing duplicate-num" from the full-texts' names, before sending them to the S3-Object-Store. They now end with the "file-hash", so it is guaranteed that they will be unique. The Worker continues to produce the previous kind of names, without any disturbance.
- Improve logging. - Update MinIO dependency.
This commit is contained in:
parent
a81ed3c60f
commit
9b95eebb6c
|
@ -45,7 +45,7 @@ dependencies {
|
|||
// https://mvnrepository.com/artifact/org.apache.commons/commons-lang3
|
||||
implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
|
||||
|
||||
implementation 'io.minio:minio:8.3.7'
|
||||
implementation 'io.minio:minio:8.3.8'
|
||||
|
||||
// https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp
|
||||
implementation group: 'com.squareup.okhttp3', name: 'okhttp', version: '4.9.3'
|
||||
|
|
|
@ -53,7 +53,7 @@ public class ImpalaConnector {
|
|||
private void createDatabase()
|
||||
{
|
||||
if ( isTestEnvironment ) {
|
||||
logger.info("Going to create (if not exist) the database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from database \"" + initialDatabaseName + "\".");
|
||||
logger.info("Going to create (if not exist) the test-database \"" + testDatabaseName + "\" and its tables. Also will fill some tables with data from database \"" + initialDatabaseName + "\".");
|
||||
jdbcTemplate.execute("CREATE DATABASE IF NOT EXISTS " + testDatabaseName);
|
||||
|
||||
jdbcTemplate.execute("CREATE TABLE IF NOT EXISTS " + testDatabaseName + ".publication stored as parquet as select * from " + initialDatabaseName + ".publication");
|
||||
|
@ -72,8 +72,10 @@ public class ImpalaConnector {
|
|||
jdbcTemplate.execute("COMPUTE STATS " + testDatabaseName + ".assignment");
|
||||
|
||||
databaseName = testDatabaseName; // For the rest of the queries.
|
||||
} else
|
||||
} else {
|
||||
logger.info("Going to create or validate the tables that are populated by the Controller, for the \"initialDatabase\" = \"" + initialDatabaseName + "\"");
|
||||
databaseName = initialDatabaseName;
|
||||
}
|
||||
|
||||
// For both cases, got check and create the tables which will be populated by the Controller.
|
||||
|
||||
|
|
|
@ -306,7 +306,9 @@ public class FileUtils {
|
|||
continue;
|
||||
}
|
||||
|
||||
fileName = datasourceId + "/" + filenameWithoutExtension + "::" + hash + fileExtension;
|
||||
// Use the "fileID" and not the "filenameWithoutExtension", as we want to avoid keeping the possible "parenthesis" with the increasing number (about the duplication of ID-fileName).
|
||||
// Now we append the file-hash, so it is guaranteed that the filename will be unique.
|
||||
fileName = datasourceId + "/" + fileNameID + "::" + hash + fileExtension;
|
||||
|
||||
String s3Url = s3ObjectStore.uploadToS3(fileName, fileFullPath);
|
||||
setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url);
|
||||
|
|
|
@ -104,9 +104,9 @@ public class S3ObjectStore {
|
|||
.object(fileObjKeyName).filename(fileFullPath)
|
||||
.contentType(contentType).build());
|
||||
|
||||
// TODO - What if the fileObjKeyName already exists? Right now it gets overwritten (unless we add versioning0, which is not currently supported by our S3ObjectStore).
|
||||
// Each Worker handles some of these cases, but in case of id-urls splitting between different workers or re-attempting some temporarily faulty urls later,
|
||||
// duplicate fileNames may appear and cause file-overwriting from the part of S3ObjectStore.
|
||||
// What if the fileObjKeyName already exists? (Default action from S3-Object-Store --> overwrite)
|
||||
// Each Worker handles some of these cases, but in case of id-urls splitting between different workers or re-attempting some temporarily faulty urls later, duplicate fileNames may appear.
|
||||
// However, the Controller uses the file-hash (instead of the duplicate -number), so it is 99.99% guaranteed that no overwrites will ever occur.
|
||||
|
||||
String s3Url = s3Protocol + bucketName + "/" + fileObjKeyName; // Be aware: This url works only if the access to the bucket is public.
|
||||
//logger.debug("Uploaded file \"" + fileObjKeyName + "\". The s3Url is: " + s3Url);
|
||||
|
|
Loading…
Reference in New Issue