- Fix typo of not using the "OpenAireID" in the S3 location of bulkImported files. Instead, the "fileNameID" was used, which in aggregation is the OpenAireID, but not in bulk-import.
- Update dependencies.
- Code polishing.
@ -270,7 +270,7 @@ public class BulkImportServiceImpl implements BulkImportService {
failedFiles.add(fileLocation);
}
if(((++counter)%150)==0){// Every 150 files, report the status for this segment and right it to the file.
if(((++counter)%150)==0){// Every 150 files, report the status for this segment and write it to the file.
msg="Progress for segment-"+segmentCounter+" : "+payloadRecords.size()+" files have been imported and "+failedFiles.size()+" have failed, out of "+numOfFilesInSegment+" files.";
if(logger.isTraceEnabled())
logger.trace(msg+additionalLoggingMsg);
@ -363,8 +363,7 @@ public class BulkImportServiceImpl implements BulkImportService {
// Check if this file is already found by crawling. Even though we started excluding this datasource from crawling, many full-texts have already been downloaded.
@ -421,7 +420,7 @@ public class BulkImportServiceImpl implements BulkImportService {
// The "matcher.group(3)" returns the "filenameWithoutExtension", which is currently not used.
// Use the "fileNameID" and not the "filenameWithoutExtension", as we want to avoid keeping the possible "parenthesis" with the increasing number (about the duplication of ID-fileName).
StringfileNameID=matcher.group(4);
StringfileNameID=matcher.group(4);// The "fileNameID" is the OpenAIRE_ID for this file.
if((fileNameID==null)||fileNameID.isEmpty()){
logger.error("Failed to extract the \"fileNameID\" from \""+fileName+"\".");
StringfilenameForS3=constructS3FileName(fileName,fileNameID,dotFileExtension,datasourceId,hash);// This name is for the uploaded file, in the S3 Object Store.
StringfilenameForS3=constructS3FileName(fileName,openAireId,dotFileExtension,datasourceId,hash);// This name is for the uploaded file, in the S3 Object Store.
if(filenameForS3==null)// The error is logged inside.