UrlsController/src/main/java/eu/openaire/urls_controller/util/S3ObjectStore.java

191 lines
8.0 KiB
Java

package eu.openaire.urls_controller.util;
import io.minio.*;
import io.minio.messages.Bucket;
import io.minio.messages.Item;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Component
public class S3ObjectStore {
private static final Logger logger = LoggerFactory.getLogger(S3ObjectStore.class);
private final String s3Protocol = "s3://";
@Value("${services.pdfaggregation.controller.s3.endpoint}")
private String endpoint = null; // This is useful to be "public", to test file-locations.
@Value("${services.pdfaggregation.controller.s3.accessKey}")
private String accessKey = null;
@Value("${services.pdfaggregation.controller.s3.secretKey}")
private String secretKey = null;
@Value("${services.pdfaggregation.controller.s3.region}")
private String region = null;
@Value("${services.pdfaggregation.controller.s3.bucketName}")
private String bucketName = null;
@Value("${services.pdfaggregation.controller.s3.shouldEmptyBucket}")
private boolean shouldEmptyBucket = false; // Set true only for testing!
@Value("${services.pdfaggregation.controller.s3.shouldShowAllS3Buckets}")
private boolean shouldShowAllS3Buckets = false;
@Value("${services.pdfaggregation.controller.isTestEnvironment}")
private boolean isTestEnvironment = false;
private MinioClient minioClient;
@PostConstruct
public void init() throws Exception {
this.minioClient = MinioClient.builder().endpoint(endpoint).credentials(accessKey, secretKey).region(region).build();
boolean bucketExists = minioClient.bucketExists(BucketExistsArgs.builder().bucket(bucketName).build());
// Keep this commented-out to avoid objects-deletion by accident. The code is open-sourced, so it's easy to enable this ability if we really want it (e.g. for testing).
/*if ( shouldEmptyBucket && isTestEnvironment && bucketExists )
emptyBucket(bucketName, false);*/
// Make the bucket, if not exist.
if ( !bucketExists ) {
logger.info("Bucket \"" + bucketName + "\" does not exist! Going to create it..");
minioClient.makeBucket(MakeBucketArgs.builder().bucket(bucketName).build());
} else
logger.debug("Bucket \"" + bucketName + "\" already exists.");
if ( shouldShowAllS3Buckets ) {
List<Bucket> buckets = null;
try {
buckets = minioClient.listBuckets();
logger.debug("The buckets in the S3 ObjectStore are:");
for ( Bucket bucket : buckets ) {
logger.debug(bucket.name());
}
} catch (Exception e) {
logger.warn("Could not listBuckets: " + e.getMessage());
}
}
}
private static final Pattern EXTENSION_PATTERN = Pattern.compile("(\\.[^.]+)$");
/**
* @param fileObjKeyName = "**File object key name**";
* @param fileFullPath = "**Path of the file to upload**";
* @return the url of the uploaded file
*/
public String uploadToS3(String fileObjKeyName, String fileFullPath) throws Exception {
String contentType = null;
// Take the Matcher to retrieve the extension.
Matcher extensionMatcher = EXTENSION_PATTERN.matcher(fileFullPath);
if ( extensionMatcher.find() ) {
String extension = null;
if ( (extension = extensionMatcher.group(0)) == null )
contentType = "application/pdf";
else {
if ( extension.equals("pdf") )
contentType = "application/pdf";
/*else if ( *//* TODO - other-extension-match *//* )
contentType = "application/EXTENSION"; */
else
contentType = "application/pdf"; // Default.
}
} else {
logger.warn("The file with key \"" + fileObjKeyName + "\" does not have a file-extension! Setting the \"pdf\"-mimeType.");
contentType = "application/pdf";
}
minioClient.uploadObject(UploadObjectArgs.builder()
.bucket(bucketName)
.object(fileObjKeyName).filename(fileFullPath)
.contentType(contentType).build());
// What if the fileObjKeyName already exists? (Default action from S3-Object-Store --> overwrite)
// Each Worker handles some of these cases, but in case of id-urls splitting between different workers or re-attempting some temporarily faulty urls later, duplicate fileNames may appear.
// However, the Controller uses the file-hash (instead of the duplicate -number), so it is 99.99% guaranteed that no overwrites will ever occur.
String s3Url = s3Protocol + bucketName + "/" + fileObjKeyName; // Be aware: This url works only if the access to the bucket is public.
//logger.debug("Uploaded file \"" + fileObjKeyName + "\". The s3Url is: " + s3Url);
return s3Url;
}
public void emptyBucket(String bucketName, boolean shouldDeleteBucket)
{
logger.warn("Going to " + (shouldDeleteBucket ? "delete" : "empty") + " bucket \"" + bucketName + "\"!");
// First list the objects of the bucket.
Iterable<Result<Item>> results;
try {
results = minioClient.listObjects(ListObjectsArgs.builder().bucket(bucketName).build());
} catch (Exception e) {
logger.error("Could not retrieve the list of objects of bucket \"" + bucketName + "\"!");
return;
}
int countDeletedFiles = 0;
int countFilesNotDeleted = 0;
long totalSize = 0;
Item item;
// Then, delete the objects.
for ( Result<Item> resultItem : results ) {
try {
item = resultItem.get();
} catch (Exception e) {
logger.error("Could not get the item-object of one of the S3-Objects returned from the bucket!", e);
countFilesNotDeleted ++;
continue;
}
totalSize += item.size();
if ( !deleteFile(item.objectName(), bucketName) ) { // The reason and for what object, is already logged.
logger.error("Cannot proceed with bucket deletion, since only an empty bucket can be removed!");
countFilesNotDeleted ++;
} else
countDeletedFiles ++;
}
if ( shouldDeleteBucket ) {
if ( countFilesNotDeleted == 0 ) {
// Lastly, delete the empty bucket. We need to do this last, as in case it's not empty, we get an error!
try {
minioClient.removeBucket(RemoveBucketArgs.builder().bucket(bucketName).build());
logger.info("Bucket \"" + bucketName + "\" was deleted!");
} catch (Exception e) {
logger.error("Bucket \"" + bucketName + "\" could not be deleted!", e);
}
} else
logger.error("Cannot execute the \"removeBucket\" command for bucket \"" + bucketName + "\", as " + countFilesNotDeleted + " files failed to be deleted!");
} else
logger.info("Bucket \"" + bucketName + "\" was emptied!");
logger.info(countDeletedFiles + " files were deleted, amounting to " + ((totalSize/1024)/1024) + " MB.");
}
public boolean isLocationInStore(String location) {
return location.startsWith(s3Protocol);
}
public boolean deleteFile(String fileObjKeyName, String bucketName)
{
try {
minioClient.removeObject(RemoveObjectArgs.builder().bucket(bucketName).object(fileObjKeyName).build());
} catch (Exception e) {
logger.error("Could not delete the file \"" + fileObjKeyName + "\" from the S3 ObjectStore, exception: " + e.getMessage(), e);
return false;
}
return true;
}
}