package eu.openaire.urls_controller.util; import io.minio.*; import io.minio.messages.Bucket; import io.minio.messages.Item; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; import javax.annotation.PostConstruct; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @Component public class S3ObjectStore { private static final Logger logger = LoggerFactory.getLogger(S3ObjectStore.class); private final String s3Protocol = "s3://"; @Value("${services.pdfaggregation.controller.s3.endpoint}") private String endpoint = null; // This is useful to be "public", to test file-locations. @Value("${services.pdfaggregation.controller.s3.accessKey}") private String accessKey = null; @Value("${services.pdfaggregation.controller.s3.secretKey}") private String secretKey = null; @Value("${services.pdfaggregation.controller.s3.region}") private String region = null; @Value("${services.pdfaggregation.controller.s3.bucketName}") private String bucketName = null; @Value("${services.pdfaggregation.controller.s3.shouldEmptyBucket}") private boolean shouldEmptyBucket = false; // Set true only for testing! @Value("${services.pdfaggregation.controller.s3.shouldShowAllS3Buckets}") private boolean shouldShowAllS3Buckets = false; @Value("${services.pdfaggregation.controller.isTestEnvironment}") private boolean isTestEnvironment = false; private MinioClient minioClient; @PostConstruct public void init() throws Exception { this.minioClient = MinioClient.builder().endpoint(endpoint).credentials(accessKey, secretKey).region(region).build(); boolean bucketExists = minioClient.bucketExists(BucketExistsArgs.builder().bucket(bucketName).build()); // Keep this commented-out to avoid objects-deletion by accident. The code is open-sourced, so it's easy to enable this ability if we really want it (e.g. for testing). /*if ( shouldEmptyBucket && isTestEnvironment && bucketExists ) emptyBucket(bucketName, false);*/ // Make the bucket, if not exist. if ( !bucketExists ) { logger.info("Bucket \"" + bucketName + "\" does not exist! Going to create it.."); minioClient.makeBucket(MakeBucketArgs.builder().bucket(bucketName).build()); } else logger.debug("Bucket \"" + bucketName + "\" already exists."); if ( shouldShowAllS3Buckets ) { List buckets = null; try { buckets = minioClient.listBuckets(); logger.debug("The buckets in the S3 ObjectStore are:"); for ( Bucket bucket : buckets ) { logger.debug(bucket.name()); } } catch (Exception e) { logger.warn("Could not listBuckets: " + e.getMessage()); } } } private static final Pattern EXTENSION_PATTERN = Pattern.compile("(\\.[^.]+)$"); /** * @param fileObjKeyName = "**File object key name**"; * @param fileFullPath = "**Path of the file to upload**"; * @return the url of the uploaded file */ public String uploadToS3(String fileObjKeyName, String fileFullPath) throws Exception { String contentType = null; // Take the Matcher to retrieve the extension. Matcher extensionMatcher = EXTENSION_PATTERN.matcher(fileFullPath); if ( extensionMatcher.find() ) { String extension = null; if ( (extension = extensionMatcher.group(0)) == null ) contentType = "application/pdf"; else { if ( extension.equals("pdf") ) contentType = "application/pdf"; /*else if ( *//* TODO - other-extension-match *//* ) contentType = "application/EXTENSION"; */ else contentType = "application/pdf"; // Default. } } else { logger.warn("The file with key \"" + fileObjKeyName + "\" does not have a file-extension! Setting the \"pdf\"-mimeType."); contentType = "application/pdf"; } minioClient.uploadObject(UploadObjectArgs.builder() .bucket(bucketName) .object(fileObjKeyName).filename(fileFullPath) .contentType(contentType).build()); // What if the fileObjKeyName already exists? (Default action from S3-Object-Store --> overwrite) // Each Worker handles some of these cases, but in case of id-urls splitting between different workers or re-attempting some temporarily faulty urls later, duplicate fileNames may appear. // However, the Controller uses the file-hash (instead of the duplicate -number), so it is 99.99% guaranteed that no overwrites will ever occur. String s3Url = s3Protocol + bucketName + "/" + fileObjKeyName; // Be aware: This url works only if the access to the bucket is public. //logger.debug("Uploaded file \"" + fileObjKeyName + "\". The s3Url is: " + s3Url); return s3Url; } public void emptyBucket(String bucketName, boolean shouldDeleteBucket) { logger.warn("Going to " + (shouldDeleteBucket ? "delete" : "empty") + " bucket \"" + bucketName + "\"!"); // First list the objects of the bucket. Iterable> results; try { results = minioClient.listObjects(ListObjectsArgs.builder().bucket(bucketName).build()); } catch (Exception e) { logger.error("Could not retrieve the list of objects of bucket \"" + bucketName + "\"!"); return; } int countDeletedFiles = 0; int countFilesNotDeleted = 0; long totalSize = 0; Item item; // Then, delete the objects. for ( Result resultItem : results ) { try { item = resultItem.get(); } catch (Exception e) { logger.error("Could not get the item-object of one of the S3-Objects returned from the bucket!", e); countFilesNotDeleted ++; continue; } totalSize += item.size(); if ( !deleteFile(item.objectName(), bucketName) ) { // The reason and for what object, is already logged. logger.error("Cannot proceed with bucket deletion, since only an empty bucket can be removed!"); countFilesNotDeleted ++; } else countDeletedFiles ++; } if ( shouldDeleteBucket ) { if ( countFilesNotDeleted == 0 ) { // Lastly, delete the empty bucket. We need to do this last, as in case it's not empty, we get an error! try { minioClient.removeBucket(RemoveBucketArgs.builder().bucket(bucketName).build()); logger.info("Bucket \"" + bucketName + "\" was deleted!"); } catch (Exception e) { logger.error("Bucket \"" + bucketName + "\" could not be deleted!", e); } } else logger.error("Cannot execute the \"removeBucket\" command for bucket \"" + bucketName + "\", as " + countFilesNotDeleted + " files failed to be deleted!"); } else logger.info("Bucket \"" + bucketName + "\" was emptied!"); logger.info(countDeletedFiles + " files were deleted, amounting to " + ((totalSize/1024)/1024) + " MB."); } public boolean isLocationInStore(String location) { return location.startsWith(s3Protocol); } public boolean deleteFile(String fileObjKeyName, String bucketName) { try { minioClient.removeObject(RemoveObjectArgs.builder().bucket(bucketName).object(fileObjKeyName).build()); } catch (Exception e) { logger.error("Could not delete the file \"" + fileObjKeyName + "\" from the S3 ObjectStore, exception: " + e.getMessage(), e); return false; } return true; } }