2022-01-30 21:14:52 +01:00
package eu.openaire.urls_controller.util ;
import io.minio.* ;
import io.minio.messages.Bucket ;
import io.minio.messages.Item ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
import org.springframework.beans.factory.annotation.Value ;
import org.springframework.stereotype.Component ;
import javax.annotation.PostConstruct ;
import java.util.List ;
import java.util.regex.Matcher ;
import java.util.regex.Pattern ;
@Component
public class S3ObjectStore {
private static final Logger logger = LoggerFactory . getLogger ( S3ObjectStore . class ) ;
2022-01-31 03:17:16 +01:00
private String s3Protocol = " s3:// " ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.endpoint} " )
2022-01-30 21:14:52 +01:00
private String endpoint = null ; // This is useful to be "public", to test file-locations.
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.accessKey} " )
2022-01-30 21:14:52 +01:00
private String accessKey = null ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.secretKey} " )
2022-01-30 21:14:52 +01:00
private String secretKey = null ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.region} " )
2022-01-30 21:14:52 +01:00
private String region = null ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.bucketName} " )
2022-01-30 21:14:52 +01:00
private String bucketName = null ;
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.shouldEmptyBucket} " )
2022-01-30 21:14:52 +01:00
private boolean shouldEmptyBucket = false ; // Set true only for testing!
2022-01-31 12:49:14 +01:00
@Value ( " ${services.pdfaggregation.controller.s3.shouldShowAllS3Buckets} " )
2022-01-30 21:14:52 +01:00
private boolean shouldShowAllS3Buckets = false ;
private MinioClient minioClient ;
@PostConstruct
public void init ( ) throws Exception {
this . minioClient = MinioClient . builder ( ) . endpoint ( endpoint ) . credentials ( accessKey , secretKey ) . region ( region ) . build ( ) ;
boolean bucketExists = minioClient . bucketExists ( BucketExistsArgs . builder ( ) . bucket ( bucketName ) . build ( ) ) ;
// Keep this commented-out to avoid objects-deletion by accident. The code is open-sourced, so it's easy to enable this ability if we really want it (e.g. for testing).
if ( bucketExists & & shouldEmptyBucket ) {
emptyBucket ( bucketName , false ) ;
//throw new RuntimeException("stop just for test!");
}
// Make the bucket, if not exist.
2022-02-02 19:19:46 +01:00
if ( ! bucketExists ) {
logger . info ( " Bucket \" " + bucketName + " \" does not exist! Going to create it.. " ) ;
minioClient . makeBucket ( MakeBucketArgs . builder ( ) . bucket ( bucketName ) . build ( ) ) ;
} else
logger . debug ( " Bucket \" " + bucketName + " \" already exists. " ) ;
2022-01-30 21:14:52 +01:00
if ( shouldShowAllS3Buckets ) {
List < Bucket > buckets = null ;
try {
buckets = minioClient . listBuckets ( ) ;
logger . debug ( " The buckets in the S3 ObjectStore are: " ) ;
for ( Bucket bucket : buckets ) {
logger . debug ( bucket . name ( ) ) ;
}
} catch ( Exception e ) {
logger . warn ( " Could not listBuckets: " + e . getMessage ( ) ) ;
}
}
}
private final Pattern EXTENSION_PATTERN = Pattern . compile ( " ( \\ .[^.]+)$ " ) ;
/ * *
* @param fileObjKeyName = " **File object key name** " ;
* @param fileFullPath = " **Path of the file to upload** " ;
* @return the url of the uploaded file
* /
public String uploadToS3 ( String fileObjKeyName , String fileFullPath ) throws Exception {
String contentType = null ;
// Take the Matcher to retrieve the extension.
Matcher extensionMatcher = EXTENSION_PATTERN . matcher ( fileFullPath ) ;
if ( extensionMatcher . find ( ) ) {
String extension = null ;
if ( ( extension = extensionMatcher . group ( 0 ) ) = = null )
contentType = " application/pdf " ;
else {
if ( extension . equals ( " pdf " ) )
contentType = " application/pdf " ;
2022-02-02 19:19:46 +01:00
/*else if ( */ /* TODO - other-extension-match */ / * )
contentType = " application/EXTENSION " ; * /
2022-01-30 21:14:52 +01:00
else
2022-02-02 19:19:46 +01:00
contentType = " application/pdf " ; // Default.
2022-01-30 21:14:52 +01:00
}
} else {
logger . warn ( " The file with key \" " + fileObjKeyName + " \" does not have a file-extension! Setting the \" pdf \" -mimeType. " ) ;
contentType = " application/pdf " ;
}
minioClient . uploadObject ( UploadObjectArgs . builder ( )
. bucket ( bucketName )
. object ( fileObjKeyName ) . filename ( fileFullPath )
. contentType ( contentType ) . build ( ) ) ;
2022-04-11 20:15:22 +02:00
// What if the fileObjKeyName already exists? (Default action from S3-Object-Store --> overwrite)
// Each Worker handles some of these cases, but in case of id-urls splitting between different workers or re-attempting some temporarily faulty urls later, duplicate fileNames may appear.
// However, the Controller uses the file-hash (instead of the duplicate -number), so it is 99.99% guaranteed that no overwrites will ever occur.
2022-01-30 21:14:52 +01:00
2022-01-31 03:17:16 +01:00
String s3Url = s3Protocol + bucketName + " / " + fileObjKeyName ; // Be aware: This url works only if the access to the bucket is public.
2022-01-30 21:14:52 +01:00
//logger.debug("Uploaded file \"" + fileObjKeyName + "\". The s3Url is: " + s3Url);
return s3Url ;
}
public void emptyBucket ( String bucketName , boolean shouldDeleteBucket ) throws Exception {
logger . warn ( " Going to " + ( shouldDeleteBucket ? " delete " : " empty " ) + " bucket \" " + bucketName + " \" " ) ;
// First list the objects of the bucket.
Iterable < Result < Item > > results = minioClient . listObjects ( ListObjectsArgs . builder ( ) . bucket ( bucketName ) . build ( ) ) ;
// Then, delete the objects.
for ( Result < Item > resultItem : results )
try {
deleteFile ( resultItem . get ( ) . objectName ( ) , bucketName ) ;
} catch ( Exception e ) {
logger . warn ( " Could not remove " + resultItem . get ( ) . objectName ( ) ) ;
}
if ( shouldDeleteBucket ) {
// Lastly, delete the empty bucket.
minioClient . removeBucket ( RemoveBucketArgs . builder ( ) . bucket ( bucketName ) . build ( ) ) ;
}
2022-02-02 19:19:46 +01:00
logger . info ( " Bucket " + bucketName + " was " + ( shouldDeleteBucket ? " deleted! " : " emptied! " ) ) ;
2022-01-30 21:14:52 +01:00
}
2022-02-02 19:19:46 +01:00
public boolean isLocationInStore ( String location ) {
return location . startsWith ( s3Protocol ) ;
2022-01-30 21:14:52 +01:00
}
private void deleteFile ( String fileObjKeyName , String bucketName ) throws Exception {
minioClient . removeObject ( RemoveObjectArgs . builder ( ) . bucket ( bucketName ) . object ( fileObjKeyName ) . build ( ) ) ;
}
}