diff --git a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java index 415968e..ced1861 100644 --- a/src/main/java/eu/openaire/urls_controller/util/FileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/FileUtils.java @@ -52,10 +52,16 @@ public class FileUtils { public String baseFilesLocation; + public static final String workingDir = System.getProperty("user.dir") + File.separator; + + public FileUtils (@Value("${services.pdfaggregation.controller.baseFilesLocation}") String baseFilesLocation) { if ( !baseFilesLocation.endsWith(File.separator) ) baseFilesLocation += File.separator; + if ( !baseFilesLocation.startsWith(File.separator) ) + baseFilesLocation = workingDir + baseFilesLocation; + this.baseFilesLocation = baseFilesLocation; } @@ -256,6 +262,7 @@ public class FileUtils { try { // Create this batch-directory. Path curBatchPath = Files.createDirectories(Paths.get(targetDirectory)); + // The base-directory will be created along with the first batch directory. // Unzip the file. Iterate over the PDFs and upload each one of them and get the S3-Url String zipFileFullPath = targetDirectory + "fullTexts_" + assignmentsBatchCounter + "_" + batchCounter + ".zip"; diff --git a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java index b2c3a16..095a711 100644 --- a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java @@ -166,6 +166,8 @@ public class ParquetFileUtils { if ( uploadFullTextsResponse == FileUtils.UploadFullTextsResponse.successful ) { + // At this point we know there was no problem with the full-texts, but we do not know if at least one full-text was retrieved. + if ( (payloadsSchema == null) // Parse the schema if it's not already parsed. && ((payloadsSchema = parseSchema(payloadSchemaFilePath)) == null ) ) { logger.error("Nothing can be done without the payloadsSchema! Exiting.."); // The cause is already logged inside the above method. diff --git a/src/main/java/eu/openaire/urls_controller/util/S3ObjectStore.java b/src/main/java/eu/openaire/urls_controller/util/S3ObjectStore.java index f88ef06..b1a402d 100644 --- a/src/main/java/eu/openaire/urls_controller/util/S3ObjectStore.java +++ b/src/main/java/eu/openaire/urls_controller/util/S3ObjectStore.java @@ -37,6 +37,7 @@ public class S3ObjectStore { private MinioClient minioClient; + @PostConstruct public void init() throws Exception { this.minioClient = MinioClient.builder().endpoint(endpoint).credentials(accessKey, secretKey).region(region).build(); @@ -70,6 +71,7 @@ public class S3ObjectStore { } } + private final Pattern EXTENSION_PATTERN = Pattern.compile("(\\.[^.]+)$"); /** @@ -113,6 +115,7 @@ public class S3ObjectStore { return s3Url; } + public void emptyBucket(String bucketName, boolean shouldDeleteBucket) throws Exception { logger.warn("Going to " + (shouldDeleteBucket ? "delete" : "empty") + " bucket \"" + bucketName + "\""); @@ -120,12 +123,16 @@ public class S3ObjectStore { Iterable> results = minioClient.listObjects(ListObjectsArgs.builder().bucket(bucketName).build()); // Then, delete the objects. - for ( Result resultItem : results ) + for ( Result resultItem : results ) { try { - deleteFile(resultItem.get().objectName(), bucketName); + if ( !deleteFile(resultItem.get().objectName(), bucketName) ) { + logger.error("Cannot proceed with bucket deletion, since only an empty bucket can be removed!"); + return; + } } catch (Exception e) { logger.warn("Could not remove " + resultItem.get().objectName()); } + } if ( shouldDeleteBucket ) { // Lastly, delete the empty bucket. @@ -135,11 +142,21 @@ public class S3ObjectStore { logger.info("Bucket " + bucketName + " was " + (shouldDeleteBucket ? "deleted!" : "emptied!")); } + public boolean isLocationInStore(String location) { return location.startsWith(s3Protocol); } - private void deleteFile(String fileObjKeyName, String bucketName) throws Exception { - minioClient.removeObject(RemoveObjectArgs.builder().bucket(bucketName).object(fileObjKeyName).build()); + + public boolean deleteFile(String fileObjKeyName, String bucketName) + { + try { + minioClient.removeObject(RemoveObjectArgs.builder().bucket(bucketName).object(fileObjKeyName).build()); + } catch (Exception e) { + logger.error("Could not delete the file \"" + fileObjKeyName + "\" from the S3 ObjectStore, exception: " + e.getMessage(), e); + return false; + } + return true; } + } diff --git a/src/main/java/eu/openaire/urls_controller/util/UriBuilder.java b/src/main/java/eu/openaire/urls_controller/util/UriBuilder.java index b4980c6..4bf59c7 100644 --- a/src/main/java/eu/openaire/urls_controller/util/UriBuilder.java +++ b/src/main/java/eu/openaire/urls_controller/util/UriBuilder.java @@ -56,17 +56,25 @@ public class UriBuilder { { String publicIpAddress = ""; HttpURLConnection conn = null; + String urlString = "https://api.ipify.org/"; try { - conn = (HttpURLConnection) new URL("https://api.ipify.org/").openConnection(); + conn = (HttpURLConnection) new URL(urlString).openConnection(); conn.setConnectTimeout(60_000); conn.setReadTimeout(60_000); conn.setRequestMethod("GET"); conn.connect(); + + int responseCode = conn.getResponseCode(); + if ( responseCode != 200 ) { + logger.warn("Cannot get the publicIP address for this machine, as \"" + urlString + "\" returned the HTTP-error-code: " + responseCode); + return null; + } + try ( BufferedReader bf = new BufferedReader(new InputStreamReader(conn.getInputStream()))) { publicIpAddress = bf.readLine().trim(); } } catch (Exception e) { - logger.warn("Cannot get the publicIP address for this machine!", e); + logger.warn("Cannot get the publicIP address for this machine, from \"" + urlString + "\"!", e); return null; } finally { if ( conn != null )