2022-01-30 21:14:52 +01:00
package eu.openaire.urls_controller.util ;
import io.minio.* ;
import io.minio.messages.Bucket ;
import io.minio.messages.Item ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
import org.springframework.beans.factory.annotation.Value ;
import org.springframework.stereotype.Component ;
import javax.annotation.PostConstruct ;
import java.util.List ;
import java.util.regex.Matcher ;
import java.util.regex.Pattern ;
@Component
public class S3ObjectStore {
private static final Logger logger = LoggerFactory . getLogger ( S3ObjectStore . class ) ;
2022-12-08 15:28:41 +01:00
private final String s3Protocol = " s3:// " ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.endpoint} " )
2022-01-30 21:14:52 +01:00
private String endpoint = null ; // This is useful to be "public", to test file-locations.
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.accessKey} " )
2022-01-30 21:14:52 +01:00
private String accessKey = null ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.secretKey} " )
2022-01-30 21:14:52 +01:00
private String secretKey = null ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.region} " )
2022-01-30 21:14:52 +01:00
private String region = null ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.bucketName} " )
2022-01-30 21:14:52 +01:00
private String bucketName = null ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.shouldEmptyBucket} " )
2022-01-30 21:14:52 +01:00
private boolean shouldEmptyBucket = false ; // Set true only for testing!
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.shouldShowAllS3Buckets} " )
2022-01-30 21:14:52 +01:00
private boolean shouldShowAllS3Buckets = false ;
2023-02-01 15:42:22 +01:00
@Value ( " ${services.pdfaggregation.controller.isTestEnvironment} " )
private boolean isTestEnvironment = false ;
2022-01-30 21:14:52 +01:00
private MinioClient minioClient ;
2023-01-09 14:44:53 +01:00
2022-01-30 21:14:52 +01:00
@PostConstruct
public void init ( ) throws Exception {
this . minioClient = MinioClient . builder ( ) . endpoint ( endpoint ) . credentials ( accessKey , secretKey ) . region ( region ) . build ( ) ;
boolean bucketExists = minioClient . bucketExists ( BucketExistsArgs . builder ( ) . bucket ( bucketName ) . build ( ) ) ;
// Keep this commented-out to avoid objects-deletion by accident. The code is open-sourced, so it's easy to enable this ability if we really want it (e.g. for testing).
2024-04-26 11:54:00 +02:00
/ * if ( shouldEmptyBucket & & isTestEnvironment & & bucketExists )
emptyBucket ( bucketName , false ) ; * /
2022-01-30 21:14:52 +01:00
// Make the bucket, if not exist.
2022-02-02 19:19:46 +01:00
if ( ! bucketExists ) {
logger . info ( " Bucket \" " + bucketName + " \" does not exist! Going to create it.. " ) ;
minioClient . makeBucket ( MakeBucketArgs . builder ( ) . bucket ( bucketName ) . build ( ) ) ;
} else
logger . debug ( " Bucket \" " + bucketName + " \" already exists. " ) ;
2022-01-30 21:14:52 +01:00
if ( shouldShowAllS3Buckets ) {
List < Bucket > buckets = null ;
try {
buckets = minioClient . listBuckets ( ) ;
logger . debug ( " The buckets in the S3 ObjectStore are: " ) ;
for ( Bucket bucket : buckets ) {
logger . debug ( bucket . name ( ) ) ;
}
} catch ( Exception e ) {
logger . warn ( " Could not listBuckets: " + e . getMessage ( ) ) ;
}
}
}
2023-01-09 14:44:53 +01:00
2023-02-01 15:42:22 +01:00
private static final Pattern EXTENSION_PATTERN = Pattern . compile ( " ( \\ .[^.]+)$ " ) ;
2022-01-30 21:14:52 +01:00
/ * *
* @param fileObjKeyName = " **File object key name** " ;
* @param fileFullPath = " **Path of the file to upload** " ;
* @return the url of the uploaded file
* /
public String uploadToS3 ( String fileObjKeyName , String fileFullPath ) throws Exception {
String contentType = null ;
// Take the Matcher to retrieve the extension.
Matcher extensionMatcher = EXTENSION_PATTERN . matcher ( fileFullPath ) ;
if ( extensionMatcher . find ( ) ) {
String extension = null ;
if ( ( extension = extensionMatcher . group ( 0 ) ) = = null )
contentType = " application/pdf " ;
else {
if ( extension . equals ( " pdf " ) )
contentType = " application/pdf " ;
2022-02-02 19:19:46 +01:00
/*else if ( */ /* TODO - other-extension-match */ / * )
contentType = " application/EXTENSION " ; * /
2022-01-30 21:14:52 +01:00
else
2022-02-02 19:19:46 +01:00
contentType = " application/pdf " ; // Default.
2022-01-30 21:14:52 +01:00
}
} else {
logger . warn ( " The file with key \" " + fileObjKeyName + " \" does not have a file-extension! Setting the \" pdf \" -mimeType. " ) ;
contentType = " application/pdf " ;
}
minioClient . uploadObject ( UploadObjectArgs . builder ( )
. bucket ( bucketName )
. object ( fileObjKeyName ) . filename ( fileFullPath )
. contentType ( contentType ) . build ( ) ) ;
2022-04-11 20:15:22 +02:00
// What if the fileObjKeyName already exists? (Default action from S3-Object-Store --> overwrite)
// Each Worker handles some of these cases, but in case of id-urls splitting between different workers or re-attempting some temporarily faulty urls later, duplicate fileNames may appear.
// However, the Controller uses the file-hash (instead of the duplicate -number), so it is 99.99% guaranteed that no overwrites will ever occur.
2022-01-30 21:14:52 +01:00
2022-01-31 03:17:16 +01:00
String s3Url = s3Protocol + bucketName + " / " + fileObjKeyName ; // Be aware: This url works only if the access to the bucket is public.
2022-01-30 21:14:52 +01:00
//logger.debug("Uploaded file \"" + fileObjKeyName + "\". The s3Url is: " + s3Url);
return s3Url ;
}
2023-01-09 14:44:53 +01:00
2024-03-11 12:34:38 +01:00
public void emptyBucket ( String bucketName , boolean shouldDeleteBucket )
{
2023-02-01 15:42:22 +01:00
logger . warn ( " Going to " + ( shouldDeleteBucket ? " delete " : " empty " ) + " bucket \" " + bucketName + " \" ! " ) ;
2022-01-30 21:14:52 +01:00
// First list the objects of the bucket.
2024-03-11 15:17:32 +01:00
Iterable < Result < Item > > results ;
try {
results = minioClient . listObjects ( ListObjectsArgs . builder ( ) . bucket ( bucketName ) . build ( ) ) ;
} catch ( Exception e ) {
logger . error ( " Could not retrieve the list of objects of bucket \" " + bucketName + " \" ! " ) ;
return ;
}
2022-01-30 21:14:52 +01:00
2024-03-11 12:34:38 +01:00
int countDeletedFiles = 0 ;
int countFilesNotDeleted = 0 ;
long totalSize = 0 ;
Item item ;
2022-01-30 21:14:52 +01:00
// Then, delete the objects.
2023-01-09 14:44:53 +01:00
for ( Result < Item > resultItem : results ) {
2022-01-30 21:14:52 +01:00
try {
2024-03-11 12:34:38 +01:00
item = resultItem . get ( ) ;
2022-01-30 21:14:52 +01:00
} catch ( Exception e ) {
2024-03-11 12:34:38 +01:00
logger . error ( " Could not get the item-object of one of the S3-Objects returned from the bucket! " , e ) ;
countFilesNotDeleted + + ;
continue ;
2022-01-30 21:14:52 +01:00
}
2024-03-11 12:34:38 +01:00
totalSize + = item . size ( ) ;
if ( ! deleteFile ( item . objectName ( ) , bucketName ) ) { // The reason and for what object, is already logged.
logger . error ( " Cannot proceed with bucket deletion, since only an empty bucket can be removed! " ) ;
countFilesNotDeleted + + ;
} else
countDeletedFiles + + ;
2023-01-09 14:44:53 +01:00
}
2022-01-30 21:14:52 +01:00
if ( shouldDeleteBucket ) {
2024-03-11 12:34:38 +01:00
if ( countFilesNotDeleted = = 0 ) {
// Lastly, delete the empty bucket. We need to do this last, as in case it's not empty, we get an error!
try {
minioClient . removeBucket ( RemoveBucketArgs . builder ( ) . bucket ( bucketName ) . build ( ) ) ;
logger . info ( " Bucket \" " + bucketName + " \" was deleted! " ) ;
} catch ( Exception e ) {
logger . error ( " Bucket \" " + bucketName + " \" could not be deleted! " , e ) ;
}
} else
logger . error ( " Cannot execute the \" removeBucket \" command for bucket \" " + bucketName + " \" , as " + countFilesNotDeleted + " files failed to be deleted! " ) ;
} else
logger . info ( " Bucket \" " + bucketName + " \" was emptied! " ) ;
2022-02-02 19:19:46 +01:00
2024-03-11 12:34:38 +01:00
logger . info ( countDeletedFiles + " files were deleted, amounting to " + ( ( totalSize / 1024 ) / 1024 ) + " MB. " ) ;
2022-01-30 21:14:52 +01:00
}
2023-01-09 14:44:53 +01:00
2022-02-02 19:19:46 +01:00
public boolean isLocationInStore ( String location ) {
return location . startsWith ( s3Protocol ) ;
2022-01-30 21:14:52 +01:00
}
2023-01-09 14:44:53 +01:00
public boolean deleteFile ( String fileObjKeyName , String bucketName )
{
try {
minioClient . removeObject ( RemoveObjectArgs . builder ( ) . bucket ( bucketName ) . object ( fileObjKeyName ) . build ( ) ) ;
} catch ( Exception e ) {
logger . error ( " Could not delete the file \" " + fileObjKeyName + " \" from the S3 ObjectStore, exception: " + e . getMessage ( ) , e ) ;
return false ;
}
return true ;
2022-01-30 21:14:52 +01:00
}
2023-01-09 14:44:53 +01:00
2022-01-30 21:14:52 +01:00
}