- Use the provided contentType per bulkImport-provenance or per workerAggregated-file, when available, instead of examining the file-extension of each file.

- Set next version.
This commit is contained in:
Lampros Smyrnaios 2024-11-09 14:18:03 +02:00
parent f8050c4165
commit 7f8eebb564
4 changed files with 42 additions and 31 deletions

View File

@ -11,7 +11,7 @@ ext {
java {
group = "${app_group}"
version = '2.8.3'
version = '2.8.4-SNAPSHOT'
sourceCompatibility = JavaVersion.VERSION_1_8
}

View File

@ -477,7 +477,7 @@ public class BulkImportServiceImpl implements BulkImportService {
if ( fileLocationData == null ) // In case we have a provenance with "idMappings", this will not have been set.
if ( (fileLocationData = getFileLocationData(fileLocation, additionalLoggingMsg)) == null )
return null;
s3Url = fileUtils.constructS3FilenameAndUploadToS3(fileLocationData.getFileDir(), fileLocationData.getFileName(), openAireId, fileLocationData.getDotFileExtension(), bulkImportSource.getDatasourceID(), fileHash);
s3Url = fileUtils.constructS3FilenameAndUploadToS3(fileLocationData.getFileDir(), fileLocationData.getFileName(), openAireId, fileLocationData.getDotFileExtension(), bulkImportSource.getDatasourceID(), fileHash, bulkImportSource.getMimeType());
if ( s3Url == null )
return null;
}

View File

@ -202,11 +202,13 @@ public class FileUtils {
String datasourceId = null;
String hash = null;
String mimeType = null;
boolean isFound = false;
for ( Payload payload : fileRelatedPayloads ) {
if ( fileNameID.equals(payload.getId()) ) {
datasourceId = payload.getDatasourceId();
hash = payload.getHash();
mimeType = payload.getMime_type();
isFound = true;
break;
}
@ -218,7 +220,7 @@ public class FileUtils {
}
try {
String s3Url = constructS3FilenameAndUploadToS3(targetDirectory, fileName, fileNameID, dotFileExtension, datasourceId, hash);
String s3Url = constructS3FilenameAndUploadToS3(targetDirectory, fileName, fileNameID, dotFileExtension, datasourceId, hash, mimeType);
if ( s3Url != null ) {
setFullTextForMultiplePayloads(fileRelatedPayloads, s3Url);
//numUploadedFiles ++;
@ -235,7 +237,7 @@ public class FileUtils {
public String constructS3FilenameAndUploadToS3(String targetDirectory, String fileName, String openAireId,
String dotFileExtension, String datasourceId, String hash) throws ConnectException, UnknownHostException
String dotFileExtension, String datasourceId, String hash, String mimeType) throws ConnectException, UnknownHostException
{
String filenameForS3 = constructS3FileName(fileName, openAireId, dotFileExtension, datasourceId, hash); // This name is for the uploaded file, in the S3 Object Store.
if ( filenameForS3 == null ) // The error is logged inside.
@ -244,7 +246,7 @@ public class FileUtils {
String fileFullPath = targetDirectory + fileName; // The fullPath to the local file (which has the previous name).
String s3Url = null;
try {
s3Url = s3ObjectStore.uploadToS3(filenameForS3, fileFullPath);
s3Url = s3ObjectStore.uploadToS3(filenameForS3, fileFullPath, mimeType);
} catch (ConnectException ce) {
logger.error("Could not connect with the S3 Object Store! " + ce.getMessage());
throw ce;

View File

@ -73,36 +73,16 @@ public class S3ObjectStore {
}
private static final Pattern EXTENSION_PATTERN = Pattern.compile("(\\.[^.]+)$");
/**
* @param fileObjKeyName = "**File object key name**";
* @param fileFullPath = "**Path of the file to upload**";
* @param fileFullPath = "**Path of the file to upload**";
* @param contentType
* @return the url of the uploaded file
*/
public String uploadToS3(String fileObjKeyName, String fileFullPath) throws Exception {
String contentType = null;
// Take the Matcher to retrieve the extension.
Matcher extensionMatcher = EXTENSION_PATTERN.matcher(fileFullPath);
if ( extensionMatcher.find() ) {
String extension = null;
if ( (extension = extensionMatcher.group(0)) == null )
contentType = "application/pdf";
else {
if ( extension.equals("pdf") )
contentType = "application/pdf";
else if ( extension.equals("xml") )
contentType = "application/xml";
/*else if ( *//* TODO - other-extension-match *//* )
contentType = "application/EXTENSION"; */
else
contentType = "application/pdf"; // Default.
}
} else {
logger.warn("The file with key \"" + fileObjKeyName + "\" does not have a file-extension! Setting the \"pdf\"-mimeType.");
contentType = "application/pdf";
}
public String uploadToS3(String fileObjKeyName, String fileFullPath, String contentType) throws Exception
{
if ( contentType == null )
contentType = extractContentTypeFromFileNameKey(fileObjKeyName, fileFullPath);
minioClient.uploadObject(UploadObjectArgs.builder()
.bucket(bucketName)
@ -189,4 +169,33 @@ public class S3ObjectStore {
return true;
}
private static final Pattern EXTENSION_PATTERN = Pattern.compile("(\\.[^.]+)$");
private String extractContentTypeFromFileNameKey(String fileNameKey, String fileFullPath)
{
String contentType;
Matcher extensionMatcher = EXTENSION_PATTERN.matcher(fileFullPath);
if ( extensionMatcher.find() ) {
String extension = null;
if ( ((extension = extensionMatcher.group(0)) == null) || extension.isEmpty() )
contentType = "application/pdf";
else {
if ( extension.equals("pdf") )
contentType = "application/pdf";
else if ( extension.equals("xml") )
contentType = "application/xml";
/*else if ( *//* TODO - other-extension-match *//* )
contentType = "application/EXTENSION"; */
else
contentType = "application/pdf"; // Default.
}
} else {
logger.warn("The file with key \"" + fileNameKey + "\" does not have a file-extension! Setting the \"pdf\"-contentType.");
contentType = "application/pdf";
}
return contentType;
}
}