From a524375656d731835bc12d1235e2623fb9ebe64c Mon Sep 17 00:00:00 2001 From: LSmyrnaios Date: Fri, 4 Aug 2023 15:30:41 +0300 Subject: [PATCH] - Create the HDFS-subDirs before generating "callableTasks" for creating and uploading the parquetFiles. - Delete gradle .zip file after installation. --- installAndRun.sh | 1 + .../urls_controller/controllers/BulkImportController.java | 2 +- .../openaire/urls_controller/services/UrlsServiceImpl.java | 6 +++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/installAndRun.sh b/installAndRun.sh index b1f320b..9f712ad 100755 --- a/installAndRun.sh +++ b/installAndRun.sh @@ -35,6 +35,7 @@ if [[ justInstall -eq 0 ]]; then echo -e "\nAsking for sudo, in order to install 'gradle'..\n" sudo mkdir /opt/gradle sudo apt install -y unzip && sudo unzip -d /opt/gradle gradle-${gradleVersion}-bin.zip + sudo rm -rf gradle-${gradleVersion}-bin.zip #ls /opt/gradle/gradle-${gradleVersion} # For debugging installation fi diff --git a/src/main/java/eu/openaire/urls_controller/controllers/BulkImportController.java b/src/main/java/eu/openaire/urls_controller/controllers/BulkImportController.java index 9916d82..f8931d2 100644 --- a/src/main/java/eu/openaire/urls_controller/controllers/BulkImportController.java +++ b/src/main/java/eu/openaire/urls_controller/controllers/BulkImportController.java @@ -201,7 +201,7 @@ public class BulkImportController { bulkImportService.bulkImportFullTextsFromDirectory(bulkImportReport, finalRelativeBulkImportDir, finalBulkImportDir, givenDir, provenance, bulkImportSource, shouldDeleteFilesOnFinish) ); - return ResponseEntity.ok().body(new BulkImportResponse(msg, bulkImportReportID)); + return ResponseEntity.ok().body(new BulkImportResponse(msg, bulkImportReportID)); // The response is automatically serialized to json and it's of type "application/json". } diff --git a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java index 5768c8b..119e879 100644 --- a/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java +++ b/src/main/java/eu/openaire/urls_controller/services/UrlsServiceImpl.java @@ -311,17 +311,17 @@ public class UrlsServiceImpl implements UrlsService { logger.debug("Going to write the results in the parquet files, then upload them to HDFS, and then load them into the database's tables. For batch-assignments_" + curReportAssignmentsCounter); - List> callableTasks = parquetFileUtils.getTasksForCreatingAndUploadingParquetFiles(urlReports, sizeOfUrlReports, curReportAssignmentsCounter, localParquetPath, uploadFullTextsResponse); - // Create HDFS subDirs for these assignments. Other background threads handling other assignments will not interfere with loading of parquetFiles to the DB tables. String endingMkDirAndParams = curReportAssignmentsCounter + "/" + parquetFileUtils.mkDirsAndParams; if ( !parquetFileUtils.applyHDFOperation(parquetFileUtils.webHDFSBaseUrl + parquetFileUtils.parquetHDFSDirectoryPathAttempts + endingMkDirAndParams) - || !parquetFileUtils.applyHDFOperation(parquetFileUtils.webHDFSBaseUrl + parquetFileUtils.parquetHDFSDirectoryPathPayloadsAggregated + endingMkDirAndParams) ) + || !parquetFileUtils.applyHDFOperation(parquetFileUtils.webHDFSBaseUrl + parquetFileUtils.parquetHDFSDirectoryPathPayloadsAggregated + endingMkDirAndParams) ) { postReportResultToWorker(curWorkerId, curReportAssignmentsCounter, "Error when creating the HDFS sub-directories for assignments_" + curReportAssignmentsCounter); return false; } + List> callableTasks = parquetFileUtils.getTasksForCreatingAndUploadingParquetFiles(urlReports, sizeOfUrlReports, curReportAssignmentsCounter, localParquetPath, uploadFullTextsResponse); + boolean hasAttemptParquetFileProblem = false; boolean hasPayloadParquetFileProblem = false;