From 0f8e2ecce6b8e55942bb56de4f4fdae462f25129 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Fri, 29 Jan 2021 10:45:07 +0100 Subject: [PATCH] Merged Datacite transfrom into this branch --- dhp-workflows/dhp-aggregation/pom.xml | 14 ++++-------- .../DataciteToOAFTransformation.scala | 14 +++++++++--- .../datacite/ImportDatacite.scala | 1 + .../datacite/oozie_app/workflow.xml | 22 ++++++++++++++++++- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 0445e0e1b..b61c3d443 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -37,7 +37,7 @@ - + @@ -58,15 +58,9 @@ eu.dnetlib.dhp dhp-common ${project.version} - - - com.sun.xml.bind - jaxb-core - - - - + + eu.dnetlib.dhp dhp-schemas ${project.version} @@ -116,4 +110,4 @@ - + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala index 9418e71da..933f1445f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/DataciteToOAFTransformation.scala @@ -11,9 +11,10 @@ import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse import java.nio.charset.CodingErrorAction +import java.text.SimpleDateFormat import java.time.LocalDate import java.time.format.DateTimeFormatter -import java.util.Locale +import java.util.{Date, Locale} import java.util.regex.Pattern import scala.collection.JavaConverters._ import scala.io.{Codec, Source} @@ -44,6 +45,8 @@ object DataciteToOAFTransformation { codec.onMalformedInput(CodingErrorAction.REPLACE) codec.onUnmappableCharacter(CodingErrorAction.REPLACE) + + private val PID_VOCABULARY = "dnet:pid_types" val COBJ_VOCABULARY = "dnet:publication_resource" val RESULT_VOCABULARY = "dnet:result_typologies" @@ -298,8 +301,13 @@ object DataciteToOAFTransformation { result.setPid(List(pid).asJava) result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true)) result.setOriginalId(List(doi).asJava) - result.setDateofcollection(s"${dateOfCollection}") - result.setDateoftransformation(s"$ts") + + val d = new Date(dateOfCollection*1000) + val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) + + + result.setDateofcollection(ISO8601FORMAT.format(d)) + result.setDateoftransformation(ISO8601FORMAT.format(ts)) result.setDataInfo(dataInfo) val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List()) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala index 06fcbb518..d5edb674a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/ImportDatacite.scala @@ -108,6 +108,7 @@ object ImportDatacite { val cnt = writeSequenceFile(hdfsTargetPath, ts, conf) + log.info(s"Imported from Datacite API $cnt documents") if (cnt > 0) { diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml index a3caa5e23..047794c9c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml @@ -13,13 +13,25 @@ nativeInputPath the path of the input MDStore + + - + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${wf:conf('resumeFrom') eq 'TransformJob'} + ${wf:conf('resumeFrom') eq 'ExportDataset'} + + + + yarn-cluster @@ -69,6 +81,14 @@ -tr${isLookupUrl} --masteryarn-cluster + + + + + + + +