diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java index f34caad75a..d7e22e557b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java @@ -64,6 +64,12 @@ public class PayloadConverterJob { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + final Boolean validateXML = Optional + .ofNullable(parser.get("validateXML")) + .map(Boolean::valueOf) + .orElse(Boolean.FALSE); + log.info("validateXML: {}", validateXML); + final String contextApiBaseUrl = parser.get("contextApiBaseUrl"); log.info("contextApiBaseUrl: {}", contextApiBaseUrl); @@ -78,18 +84,19 @@ public class PayloadConverterJob { runWithSparkSession(conf, isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - convertToXml( + createPayloads( spark, inputPath, outputPath, ContextMapper.fromAPI(contextApiBaseUrl), - VocabularyGroup.loadVocsFromIS(isLookup)); + VocabularyGroup.loadVocsFromIS(isLookup), validateXML); }); } - private static void convertToXml( + private static void createPayloads( final SparkSession spark, final String inputPath, final String outputPath, final ContextMapper contextMapper, - final VocabularyGroup vocabularies) { + final VocabularyGroup vocabularies, + final Boolean validateXML) { final XmlRecordFactory recordFactory = new XmlRecordFactory( prepareAccumulators(spark.sparkContext()), @@ -110,7 +117,7 @@ public class PayloadConverterJob { .as(Encoders.kryo(JoinedEntity.class)) .map( (MapFunction>) je -> new Tuple2<>( - recordFactory.build(je), + recordFactory.build(je, validateXML), ProvisionModelSupport.transform(je, contextMapper, vocabularies)), Encoders.tuple(Encoders.STRING(), Encoders.bean(SolrRecord.class))) .map( diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_payload_converter.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_payload_converter.json index 4509eb9de4..1b43ca5fd8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_payload_converter.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_payload_converter.json @@ -22,5 +22,11 @@ "paramLongName": "isLookupUrl", "paramDescription": "URL of the context ISLookup Service", "paramRequired": true + }, + { + "paramName": "val", + "paramLongName": "validateXML", + "paramDescription": "should the process check the XML validity", + "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 59058d4677..1682f2ed5b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -13,6 +13,11 @@ contextApiBaseUrl context API URL + + validateXML + should the payload converter validate the XMLs + false + relPartitions number or partitions for the relations Dataset @@ -610,6 +615,7 @@ --inputPath/user/claudio.atzori/data/beta_provision/join_entities --outputPath${workingDir}/xml_json + --validateXML${validateXML} --contextApiBaseUrl${contextApiBaseUrl} --isLookupUrl${isLookupUrl}