diff --git a/build.gradle b/build.gradle index 909fdf3..c431bba 100644 --- a/build.gradle +++ b/build.gradle @@ -6,7 +6,7 @@ plugins { java { group = 'eu.openaire.urls_controller' - version = '2.7.3' + version = '2.7.4-SNAPSHOT' sourceCompatibility = JavaVersion.VERSION_1_8 } @@ -112,9 +112,6 @@ dependencies { implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8. implementation 'com.fasterxml.woodstox:woodstox-core:6.6.2' - // https://mvnrepository.com/artifact/org.json/json - implementation 'org.json:json:20240303' // This is used only in "ParquetFileUtils.createRemoteParquetDirectories()". TODO - Replace it with "gson". - // https://mvnrepository.com/artifact/com.google.code.gson/gson implementation 'com.google.code.gson:gson:2.11.0' diff --git a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java index dc63ae5..aa09410 100644 --- a/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java +++ b/src/main/java/eu/openaire/urls_controller/util/ParquetFileUtils.java @@ -1,6 +1,10 @@ package eu.openaire.urls_controller.util; import com.google.common.collect.Lists; +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; import eu.openaire.urls_controller.configuration.DatabaseConnector; import eu.openaire.urls_controller.models.Error; import eu.openaire.urls_controller.models.*; @@ -15,9 +19,6 @@ import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.hadoop.util.HadoopOutputFile; import org.apache.parquet.io.OutputFile; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -593,33 +594,30 @@ public class ParquetFileUtils { boolean foundPayloadsAggregatedDir = false; boolean foundPayloadsBulkImportDir = false; + final Gson gson = new Gson(); try { // Parse the jsonData - JSONObject jObj = new JSONObject(jsonResponse); // Construct a JSONObject from the retrieved jsonData. - JSONObject entityObject = jObj.getJSONObject("FileStatuses"); - //logger.trace("EntityObject: " + entityObject.toString()); // DEBUG! + JsonObject jsonObject = gson.fromJson(jsonResponse, JsonObject.class); + JsonArray fileStatuses = jsonObject.get("FileStatuses").getAsJsonObject().get("FileStatus").getAsJsonArray(); - JSONArray directoryStatuses = entityObject.getJSONArray("FileStatus"); - //logger.trace("directoryStatuses: " + directoryStatuses.toString()); // DEBUG! - - // In case no fileStatuses are found, the follow for-loop will not run. - for ( Object fileStatusObject : directoryStatuses ) { - JSONObject fileStatusJsonObject = (JSONObject) fileStatusObject; - //logger.trace("FileStatusJsonObject: " + fileStatusJsonObject.toString()); // DEBUG! - - String dirPath = fileStatusJsonObject.getString("pathSuffix"); - //logger.trace("DirPath: " + dirPath); // DEBUG! - - if ( dirPath.equals("attempts") ) + for ( JsonElement fileStatusElement : fileStatuses ) { // In case no fileStatuses are found, the follow for-loop will not run and the "found*" variables will remain "false", thus triggering the creation process. + JsonObject fileStatusObject = fileStatusElement.getAsJsonObject(); + String path = fileStatusObject.get("pathSuffix").getAsString(); + String type = fileStatusObject.get("type").getAsString(); + if ( !type.equals("DIRECTORY") ) { + logger.warn("Unknown file found: " + path); // We + continue; + } + if ( path.equals("attempts") ) foundAttemptsDir = true; - else if ( dirPath.equals("payloads_aggregated") ) + else if ( path.equals("payloads_aggregated") ) foundPayloadsAggregatedDir = true; - else if ( dirPath.equals("payloads_bulk_import") ) + else if ( path.equals("payloads_bulk_import") ) foundPayloadsBulkImportDir = true; - else if ( ! dirPath.equals("test") ) // The "test" directory helps with testing the service, without interfering with the production directories. - logger.warn("Unknown remote parquet HDFS-directory found: " + dirPath); + else if ( ! path.equals("test") ) // The "test" directory helps with testing the service, without interfering with the production directories. This subdir will be found only when running in production. + logger.warn("Unknown remote parquet HDFS-directory found: " + path); } - } catch (JSONException je) { // In case any of the above "json-keys" was not found. - logger.warn("JSON Exception was thrown while trying to retrieve the subdirectories \"attempts\" and \"payloads\": " + je.getMessage() + "\n\nJsonResponse: " + jsonResponse); + } catch (Exception e) { // In case any of the above "json-keys" was not found. + logger.error("JSON Exception was thrown while trying to retrieve the \"attempts\" and \"payloads_*\" subdirectories: " + e.getMessage() + "\n\nJsonResponse: " + jsonResponse, e); return false; }