diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/config-default.xml index d3fad7dfe..2ba83ba73 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/config-default.xml @@ -2,64 +2,13 @@ - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - spark2YarnHistoryServerAddress - http://hadoop-rm2.garr-pa1.d4science.org:19888 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - - - - + - - - - - - - - - - - - - + @@ -70,6 +19,14 @@ + + + + + + + + @@ -81,4 +38,47 @@ + + + + + jobTracker + hadoop-rm3.garr-pa1.d4science.org:8032 + + + nameNode + hdfs://hadoop-rm1.garr-pa1.d4science.org:8020 + + + hive_metastore_uris + thrift://hadoop-edge3.garr-pa1.d4science.org:9083 + + + spark2YarnHistoryServerAddress + http://hadoop-rm2.garr-pa1.d4science.org:19888 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml index 92205ed96..92af9a8ef 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/bioschema/oozie_app/workflow.xml @@ -17,12 +17,20 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/BioSchemaProtein.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/BioSchemaProtein.java index ececaf413..8b71520b9 100644 --- a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/BioSchemaProtein.java +++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/bioschema/model/BioSchemaProtein.java @@ -5,6 +5,9 @@ import java.util.List; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; + +import eu.dnetlib.dhp.rdfconverter.utils.CustomPropertyValueDeserializer; @JsonIgnoreProperties(ignoreUnknown = true) public class BioSchemaProtein { @@ -58,6 +61,7 @@ public class BioSchemaProtein { @JsonProperty("https://schema.org/additionalProperty") private List sequenceAnnotation; @JsonProperty("https://schema.org/value") + @JsonDeserialize(using = CustomPropertyValueDeserializer.class) private Link propertyValue; @JsonProperty("https://schema.org/termCode") private String termCode; diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/CustomPropertyValueDeserializer.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/CustomPropertyValueDeserializer.java new file mode 100644 index 000000000..44b582ad3 --- /dev/null +++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/CustomPropertyValueDeserializer.java @@ -0,0 +1,43 @@ + +package eu.dnetlib.dhp.rdfconverter.utils; + +import java.io.IOException; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.ObjectCodec; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; + +import eu.dnetlib.dhp.rdfconverter.bioschema.model.BioSchemaProtein; + +public class CustomPropertyValueDeserializer + extends StdDeserializer { + + public CustomPropertyValueDeserializer() { + this(null); + } + + public CustomPropertyValueDeserializer(Class vc) { + super(vc); + } + + @Override + public BioSchemaProtein.Link deserialize( + JsonParser jsonparser, DeserializationContext context) + throws IOException { + ObjectCodec oc = jsonparser.getCodec(); + JsonNode node = oc.readTree(jsonparser); + JsonNode id = node.get("@id"); + if (id != null) { + BioSchemaProtein.Link link = new BioSchemaProtein.Link(); + link.setId(id.asText()); + return link; + } else { + String txt = node.asText(); + BioSchemaProtein.Link link = new BioSchemaProtein.Link(); + link.setId(txt); + return link; + } + } +} diff --git a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/RDFConverter.java b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/RDFConverter.java index 9172ff087..f284d49df 100644 --- a/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/RDFConverter.java +++ b/dhp-workflows/dhp-rdfconverter/src/main/java/eu/dnetlib/dhp/rdfconverter/utils/RDFConverter.java @@ -16,6 +16,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.module.SimpleModule; import com.github.jsonldjava.core.JsonLdOptions; import com.github.jsonldjava.core.JsonLdProcessor; import com.github.jsonldjava.utils.JsonUtils; @@ -50,6 +51,8 @@ public class RDFConverter { objectMapper.enable(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY); objectMapper.enable(DeserializationFeature.ACCEPT_EMPTY_STRING_AS_NULL_OBJECT); objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + objectMapper.configure(DeserializationFeature.FAIL_ON_INVALID_SUBTYPE, false); + objectMapper.configure(DeserializationFeature.FAIL_ON_MISSING_CREATOR_PROPERTIES, false); BioSchemaProtein bioSchemaProtein = objectMapper.readValue(compactContent, BioSchemaProtein.class); BioSchemaProtein.DateTimeType retrievedOnType = bioSchemaProtein.getRetrievedOn(); diff --git a/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java b/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java index a203dcd40..15cda0118 100644 --- a/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java +++ b/dhp-workflows/dhp-rdfconverter/src/test/java/eu/dnetlib/dhp/rdfconverter/bioschema/ConverterTest.java @@ -15,17 +15,16 @@ public class ConverterTest { static Logger logger = LoggerFactory.getLogger(ConverterTest.class); -// @Test -// private void nqToDataciteTest() throws Exception { -// InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq"); -// String nq = IOUtils.toString(is); -// logger.debug("NQ: " + nq); -// RDFConverter converter = new RDFConverter(); -// ArrayList results = converter.nQuadsFile2DataciteJson(nq, "Protein"); -// if (results != null && !results.isEmpty()) { -// logger.info("JSON DATACITE: " + results.get(0)); -// } -// } + @Test + public void disprotToDataciteTest() throws Exception { + InputStream is = ConverterTest.class.getResourceAsStream("/eu/dnetlib/dhp/rdfconverter/bioschema/disprot.nq"); + String nq = IOUtils.toString(is); + RDFConverter converter = new RDFConverter(); + ArrayList results = converter.nQuadsFile2DataciteJson(nq, "Protein"); + results.stream().forEach(r -> { + logger.info("JSON DATACITE >> " + r); + }); + } @Test public void pedCitationTest() throws Exception {