- Replace all "json" usages, with "gson" ones, in order to remove the "org.json:json" dependency.

- Add an extra check to verify that the remote parquet directories are directories indeed.
- Set new version.
This commit is contained in:
Lampros Smyrnaios 2024-06-06 14:40:39 +03:00
parent 9610b77b2b
commit ed7bf09f9b
2 changed files with 23 additions and 28 deletions

View File

@ -6,7 +6,7 @@ plugins {
java { java {
group = 'eu.openaire.urls_controller' group = 'eu.openaire.urls_controller'
version = '2.7.3' version = '2.7.4-SNAPSHOT'
sourceCompatibility = JavaVersion.VERSION_1_8 sourceCompatibility = JavaVersion.VERSION_1_8
} }
@ -112,9 +112,6 @@ dependencies {
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8. implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8.
implementation 'com.fasterxml.woodstox:woodstox-core:6.6.2' implementation 'com.fasterxml.woodstox:woodstox-core:6.6.2'
// https://mvnrepository.com/artifact/org.json/json
implementation 'org.json:json:20240303' // This is used only in "ParquetFileUtils.createRemoteParquetDirectories()". TODO - Replace it with "gson".
// https://mvnrepository.com/artifact/com.google.code.gson/gson // https://mvnrepository.com/artifact/com.google.code.gson/gson
implementation 'com.google.code.gson:gson:2.11.0' implementation 'com.google.code.gson:gson:2.11.0'

View File

@ -1,6 +1,10 @@
package eu.openaire.urls_controller.util; package eu.openaire.urls_controller.util;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import eu.openaire.urls_controller.configuration.DatabaseConnector; import eu.openaire.urls_controller.configuration.DatabaseConnector;
import eu.openaire.urls_controller.models.Error; import eu.openaire.urls_controller.models.Error;
import eu.openaire.urls_controller.models.*; import eu.openaire.urls_controller.models.*;
@ -15,9 +19,6 @@ import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.hadoop.util.HadoopOutputFile; import org.apache.parquet.hadoop.util.HadoopOutputFile;
import org.apache.parquet.io.OutputFile; import org.apache.parquet.io.OutputFile;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
@ -593,33 +594,30 @@ public class ParquetFileUtils {
boolean foundPayloadsAggregatedDir = false; boolean foundPayloadsAggregatedDir = false;
boolean foundPayloadsBulkImportDir = false; boolean foundPayloadsBulkImportDir = false;
final Gson gson = new Gson();
try { // Parse the jsonData try { // Parse the jsonData
JSONObject jObj = new JSONObject(jsonResponse); // Construct a JSONObject from the retrieved jsonData. JsonObject jsonObject = gson.fromJson(jsonResponse, JsonObject.class);
JSONObject entityObject = jObj.getJSONObject("FileStatuses"); JsonArray fileStatuses = jsonObject.get("FileStatuses").getAsJsonObject().get("FileStatus").getAsJsonArray();
//logger.trace("EntityObject: " + entityObject.toString()); // DEBUG!
JSONArray directoryStatuses = entityObject.getJSONArray("FileStatus"); for ( JsonElement fileStatusElement : fileStatuses ) { // In case no fileStatuses are found, the follow for-loop will not run and the "found*" variables will remain "false", thus triggering the creation process.
//logger.trace("directoryStatuses: " + directoryStatuses.toString()); // DEBUG! JsonObject fileStatusObject = fileStatusElement.getAsJsonObject();
String path = fileStatusObject.get("pathSuffix").getAsString();
// In case no fileStatuses are found, the follow for-loop will not run. String type = fileStatusObject.get("type").getAsString();
for ( Object fileStatusObject : directoryStatuses ) { if ( !type.equals("DIRECTORY") ) {
JSONObject fileStatusJsonObject = (JSONObject) fileStatusObject; logger.warn("Unknown file found: " + path); // We
//logger.trace("FileStatusJsonObject: " + fileStatusJsonObject.toString()); // DEBUG! continue;
String dirPath = fileStatusJsonObject.getString("pathSuffix");
//logger.trace("DirPath: " + dirPath); // DEBUG!
if ( dirPath.equals("attempts") )
foundAttemptsDir = true;
else if ( dirPath.equals("payloads_aggregated") )
foundPayloadsAggregatedDir = true;
else if ( dirPath.equals("payloads_bulk_import") )
foundPayloadsBulkImportDir = true;
else if ( ! dirPath.equals("test") ) // The "test" directory helps with testing the service, without interfering with the production directories.
logger.warn("Unknown remote parquet HDFS-directory found: " + dirPath);
} }
} catch (JSONException je) { // In case any of the above "json-keys" was not found. if ( path.equals("attempts") )
logger.warn("JSON Exception was thrown while trying to retrieve the subdirectories \"attempts\" and \"payloads\": " + je.getMessage() + "\n\nJsonResponse: " + jsonResponse); foundAttemptsDir = true;
else if ( path.equals("payloads_aggregated") )
foundPayloadsAggregatedDir = true;
else if ( path.equals("payloads_bulk_import") )
foundPayloadsBulkImportDir = true;
else if ( ! path.equals("test") ) // The "test" directory helps with testing the service, without interfering with the production directories. This subdir will be found only when running in production.
logger.warn("Unknown remote parquet HDFS-directory found: " + path);
}
} catch (Exception e) { // In case any of the above "json-keys" was not found.
logger.error("JSON Exception was thrown while trying to retrieve the \"attempts\" and \"payloads_*\" subdirectories: " + e.getMessage() + "\n\nJsonResponse: " + jsonResponse, e);
return false; return false;
} }