From 1265dadc90686ad9a687a8bf7e20fd739d663ae9 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Thu, 20 May 2021 19:01:28 +0200 Subject: [PATCH] workflow aligned with stable_ids --- .../orcid/SparkDownloadOrcidAuthors.java | 2 +- .../orcidnodoi/oaf/PublicationToOaf.java | 4 +- .../oozie_app/workflow.xml | 42 ------------------- .../orcidnodoi/oozie_app/workflow.xml | 31 ++++++++++---- 4 files changed, 27 insertions(+), 52 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/import_orcid_no_doi/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 36b4b073d..8cf070213 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -62,7 +62,7 @@ public class SparkDownloadOrcidAuthors { isSparkSessionManaged, spark -> { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); - logger.info("lastUpdate: ", lastUpdate); + logger.info("lastUpdate: {}", lastUpdate); if (StringUtils.isBlank(lastUpdate)) { throw new RuntimeException("last update info not found"); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 5c3236222..ccae4d976 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -174,7 +174,9 @@ public class PublicationToOaf implements Serializable { publication .getExternalReference() .add( - convertExtRef(extId, classid, classname, "dnet:pid_types", "dnet:pid_types")); + convertExtRef( + extId, classid, classname, ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES)); } }); diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/import_orcid_no_doi/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/import_orcid_no_doi/oozie_app/workflow.xml deleted file mode 100644 index becdf0974..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/import_orcid_no_doi/oozie_app/workflow.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - inputPath - /data/orcid_activities_2020/no_doi_dataset - path where retrieve the already generated action set - - - outputPath - /data/orcid_activities_2020/test_import_orcid_no_doi - path where to store the action set - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${inputPath}/* - ${outputPath} - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml index 6513ff7e1..365c4d5b4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/orcidnodoi/oozie_app/workflow.xml @@ -1,5 +1,15 @@ + + workingPath + /data/orcid_activities_2020 + path where the collection workflow stores the ORCID data + + + outputPath + path where to store the action set + + spark2GenNoDoiDatasetMaxExecutors 40 @@ -35,10 +45,6 @@ spark2EventLogDir spark 2.* event log dir location - - workingPath - the working dir base path - @@ -83,11 +89,20 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - -w${workingPath}/ - -n${nameNode} - -ilast_orcid_dataset - -oewno_doi_dataset + --workingPath${workingPath}/ + --hdfsServerUri${nameNode} + --orcidDataFolderlast_orcid_dataset + --outputEnrichedWorksPathno_doi_dataset + + + + + + + ${workingPath}/no_doi_dataset/* + ${outputPath} +