forked from lsmyrnaios/UrlsController
- Replace all "json" usages, with "gson" ones, in order to remove the "org.json:json" dependency.
- Add an extra check to verify that the remote parquet directories are directories indeed. - Set new version.
This commit is contained in:
parent
9610b77b2b
commit
ed7bf09f9b
|
@ -6,7 +6,7 @@ plugins {
|
|||
|
||||
java {
|
||||
group = 'eu.openaire.urls_controller'
|
||||
version = '2.7.3'
|
||||
version = '2.7.4-SNAPSHOT'
|
||||
sourceCompatibility = JavaVersion.VERSION_1_8
|
||||
}
|
||||
|
||||
|
@ -112,9 +112,6 @@ dependencies {
|
|||
implementation 'org.apache.thrift:libthrift:0.17.0' // Newer versions (>=0.18.X) are not compatible with JAVA 8.
|
||||
implementation 'com.fasterxml.woodstox:woodstox-core:6.6.2'
|
||||
|
||||
// https://mvnrepository.com/artifact/org.json/json
|
||||
implementation 'org.json:json:20240303' // This is used only in "ParquetFileUtils.createRemoteParquetDirectories()". TODO - Replace it with "gson".
|
||||
|
||||
// https://mvnrepository.com/artifact/com.google.code.gson/gson
|
||||
implementation 'com.google.code.gson:gson:2.11.0'
|
||||
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
package eu.openaire.urls_controller.util;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.JsonArray;
|
||||
import com.google.gson.JsonElement;
|
||||
import com.google.gson.JsonObject;
|
||||
import eu.openaire.urls_controller.configuration.DatabaseConnector;
|
||||
import eu.openaire.urls_controller.models.Error;
|
||||
import eu.openaire.urls_controller.models.*;
|
||||
|
@ -15,9 +19,6 @@ import org.apache.parquet.hadoop.ParquetWriter;
|
|||
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
|
||||
import org.apache.parquet.hadoop.util.HadoopOutputFile;
|
||||
import org.apache.parquet.io.OutputFile;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
@ -593,33 +594,30 @@ public class ParquetFileUtils {
|
|||
boolean foundPayloadsAggregatedDir = false;
|
||||
boolean foundPayloadsBulkImportDir = false;
|
||||
|
||||
final Gson gson = new Gson();
|
||||
try { // Parse the jsonData
|
||||
JSONObject jObj = new JSONObject(jsonResponse); // Construct a JSONObject from the retrieved jsonData.
|
||||
JSONObject entityObject = jObj.getJSONObject("FileStatuses");
|
||||
//logger.trace("EntityObject: " + entityObject.toString()); // DEBUG!
|
||||
JsonObject jsonObject = gson.fromJson(jsonResponse, JsonObject.class);
|
||||
JsonArray fileStatuses = jsonObject.get("FileStatuses").getAsJsonObject().get("FileStatus").getAsJsonArray();
|
||||
|
||||
JSONArray directoryStatuses = entityObject.getJSONArray("FileStatus");
|
||||
//logger.trace("directoryStatuses: " + directoryStatuses.toString()); // DEBUG!
|
||||
|
||||
// In case no fileStatuses are found, the follow for-loop will not run.
|
||||
for ( Object fileStatusObject : directoryStatuses ) {
|
||||
JSONObject fileStatusJsonObject = (JSONObject) fileStatusObject;
|
||||
//logger.trace("FileStatusJsonObject: " + fileStatusJsonObject.toString()); // DEBUG!
|
||||
|
||||
String dirPath = fileStatusJsonObject.getString("pathSuffix");
|
||||
//logger.trace("DirPath: " + dirPath); // DEBUG!
|
||||
|
||||
if ( dirPath.equals("attempts") )
|
||||
foundAttemptsDir = true;
|
||||
else if ( dirPath.equals("payloads_aggregated") )
|
||||
foundPayloadsAggregatedDir = true;
|
||||
else if ( dirPath.equals("payloads_bulk_import") )
|
||||
foundPayloadsBulkImportDir = true;
|
||||
else if ( ! dirPath.equals("test") ) // The "test" directory helps with testing the service, without interfering with the production directories.
|
||||
logger.warn("Unknown remote parquet HDFS-directory found: " + dirPath);
|
||||
for ( JsonElement fileStatusElement : fileStatuses ) { // In case no fileStatuses are found, the follow for-loop will not run and the "found*" variables will remain "false", thus triggering the creation process.
|
||||
JsonObject fileStatusObject = fileStatusElement.getAsJsonObject();
|
||||
String path = fileStatusObject.get("pathSuffix").getAsString();
|
||||
String type = fileStatusObject.get("type").getAsString();
|
||||
if ( !type.equals("DIRECTORY") ) {
|
||||
logger.warn("Unknown file found: " + path); // We
|
||||
continue;
|
||||
}
|
||||
} catch (JSONException je) { // In case any of the above "json-keys" was not found.
|
||||
logger.warn("JSON Exception was thrown while trying to retrieve the subdirectories \"attempts\" and \"payloads\": " + je.getMessage() + "\n\nJsonResponse: " + jsonResponse);
|
||||
if ( path.equals("attempts") )
|
||||
foundAttemptsDir = true;
|
||||
else if ( path.equals("payloads_aggregated") )
|
||||
foundPayloadsAggregatedDir = true;
|
||||
else if ( path.equals("payloads_bulk_import") )
|
||||
foundPayloadsBulkImportDir = true;
|
||||
else if ( ! path.equals("test") ) // The "test" directory helps with testing the service, without interfering with the production directories. This subdir will be found only when running in production.
|
||||
logger.warn("Unknown remote parquet HDFS-directory found: " + path);
|
||||
}
|
||||
} catch (Exception e) { // In case any of the above "json-keys" was not found.
|
||||
logger.error("JSON Exception was thrown while trying to retrieve the \"attempts\" and \"payloads_*\" subdirectories: " + e.getMessage() + "\n\nJsonResponse: " + jsonResponse, e);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue