diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/authorpids/MakeReportSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/authorpids/MakeReportSparkJob.java index 3193659f2..0db8ff62b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/authorpids/MakeReportSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/authorpids/MakeReportSparkJob.java @@ -98,7 +98,7 @@ public class MakeReportSparkJob implements Serializable { .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String outputPath = parser.get("reportOutputPath"); + String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); String preparedInfoPath = parser.get("preparedInfoPath"); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml index a3728331e..1a193c333 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean/oozie_app/workflow.xml @@ -311,12 +311,12 @@ - ${wf:conf('emend') eq true} + ${wf:conf('emend') eq true} - + @@ -343,7 +343,6 @@ --inputPath${workingDir}/cleaned/publication --outputPath${workingDir}/prepared/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication - @@ -369,7 +368,6 @@ --inputPath${workingDir}/cleaned/dataset --outputPath${workingDir}/prepared/dataset --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset - @@ -395,7 +393,6 @@ --inputPath${workingDir}/cleaned/software --outputPath${workingDir}/prepared/software --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software - @@ -421,21 +418,13 @@ --inputPath${workingDir}/cleaned/otherresearchproduct --outputPath${workingDir}/prepared/otherresearchproduct --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - - + - - - ${wf:conf('clean') eq false} - ${wf:conf('clean') eq true} - - - @@ -461,12 +450,10 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 - --inputPath${workingDir}/cleaned/publication --outputPath${workingDir}/report/publication - --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication --preparedInfoPath${workingDir}/prepared/publication - --orcidInputPath${orcidInputPath} --whitelist${whitelist} + --orcidInputPath${orcidInputPath} @@ -491,8 +478,6 @@ --preparedInfoPath${workingDir}/prepared/dataset --outputPath${workingDir}/report/dataset - --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset - --inputPath${workingDir}/cleaned/dataset --orcidInputPath${orcidInputPath} --whitelist${whitelist} @@ -518,9 +503,7 @@ --conf spark.sql.shuffle.partitions=7680 --preparedInfoPath${workingDir}/prepared/otherresearchproduct - --outputPath$workingDir}/report/otherresearchproduct - --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --inputPath${workingDir}/cleaned/otherresearchproduct + --outputPath${workingDir}/report/otherresearchproduct --orcidInputPath${orcidInputPath} --whitelist${whitelist} @@ -546,9 +529,7 @@ --conf spark.sql.shuffle.partitions=7680 --preparedInfoPath${workingDir}/prepared/software - --outputPath$workingDir}/report/software - --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software - --inputPath${workingDir}/cleaned/software + --outputPath${workingDir}/report/software --orcidInputPath${orcidInputPath} --whitelist${whitelist} @@ -586,8 +567,7 @@ --inputPath${workingDir}/cleaned/publication --outputPath${graphOutputPath}/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication - --preparedInfoPath${workingDir}/prepared/publication - --orcidInputPath${orcidInputPath} + --reportPath${workingDir}/report/publication @@ -610,11 +590,10 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 - --preparedInfoPath${workingDir}/prepared/dataset + --reportPath${workingDir}/report/dataset --outputPath${graphOutputPath}/dataset --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset --inputPath${workingDir}/cleaned/dataset - --orcidInputPath${orcidInputPath} @@ -637,11 +616,10 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 - --preparedInfoPath${workingDir}/prepared/otherresearchproduct + --reportPath${workingDir}/report/otherresearchproduct --outputPath${graphOutputPath}/otherresearchproduct --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --inputPath${workingDir}/cleaned/otherresearchproduct - --orcidInputPath${orcidInputPath} @@ -664,11 +642,10 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 - --preparedInfoPath${workingDir}/prepared/software + --reportPath${workingDir}/report/software --outputPath${graphOutputPath}/software --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software --inputPath${workingDir}/cleaned/software - --orcidInputPath${orcidInputPath} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean_orcid/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean_orcid/oozie_app/workflow.xml index 225c1903b..dfcc45e41 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean_orcid/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/clean_orcid/oozie_app/workflow.xml @@ -222,7 +222,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 - --reportOutputPath${reportOutputPath}/publication + --outputPath${workingDir}/report/publication --preparedInfoPath${workingDir}/publication --orcidInputPath${orcidInputPath} --whitelist${whitelist} @@ -249,7 +249,7 @@ --conf spark.sql.shuffle.partitions=7680 --preparedInfoPath${workingDir}/dataset - --reportOutputPath${reportOutputPath}/dataset + --outputPath${workingDir}/report/dataset --orcidInputPath${orcidInputPath} --whitelist${whitelist} @@ -275,7 +275,7 @@ --conf spark.sql.shuffle.partitions=7680 --preparedInfoPath${workingDir}/otherresearchproduct - --reportOutputPath${reportOutputPath}/otherresearchproduct + --outputPath${workingDir}/report/otherresearchproduct --orcidInputPath${orcidInputPath} --whitelist${whitelist} @@ -301,7 +301,7 @@ --conf spark.sql.shuffle.partitions=7680 --preparedInfoPath${workingDir}/software - --reportOutputPath${reportOutputPath}/software + --outputPath${workingDir}/report/software --orcidInputPath${orcidInputPath} --whitelist${whitelist} @@ -338,7 +338,7 @@ --inputPath${inputPath}/publication --outputPath${outputPath}/publication --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Publication - --reportPath${reportOutputPath}/publication + --reportPath${workingDir}/report/publication @@ -361,7 +361,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 - --reportPath${reportOutputPath}/dataset + --reportPath${workingDir}/report/dataset --outputPath${outputPath}/dataset --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Dataset --inputPath${inputPath}/dataset @@ -387,7 +387,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 - --reportPath${reportOutputPath}/otherresearchproduct + --reportPath${workingDir}/report/otherresearchproduct --outputPath${outputPath}/otherresearchproduct --graphTableClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --inputPath${inputPath}/otherresearchproduct @@ -413,7 +413,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.sql.shuffle.partitions=7680 - --reportPath${reportOutputPath}/software + --reportPath${workingDir}/report/software --outputPath${outputPath}/software --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Software --inputPath${inputPath}/software diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/wrongassociation.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/wrongassociation.json deleted file mode 100644 index 8b6311641..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/wrongassociation.json +++ /dev/null @@ -1,35 +0,0 @@ -{"orcid":"Alex Bullock" ,"result": "Gillian Farnie"} -{"orcid": "Luís Rocha", "result":"Pedro Relvas"} -{"orcid": "Prasanth Manohar", "result": "Nachimuthu Ramesh"} -{"orcid": "Zhiying Lin", "result":"Guanglong Huang"} -{"orcid":"Andrew Golnar","result":"Kim Pepin"} -{"orcid": "Gilles Marcou", "result":"Filippo Lunghini"} -{"orcid": "Philip Hahn", "result":"John Maron"} -{"orcid": "Kirsty Gibson", "result":"Kim R. Hardie"} -{"orcid": "Paula Lago", "result":"Shingo Takeda"} -{"orcid": "Paul Seidler", "result":"Dalziel J. Wilson"} -{"orcid": "Solomon Okunade", "result":"Rufus Adebayo Ajisafe"} -{"orcid": "Emi Arai", "result":"Masaru Hasegawa"} -{"orcid": "Dr Muhammad Yameen Sandhu", "result":"Nutapong Somjit"} -{"orcid": "Xianlei Cai", "result":"Weiming Yu"} -{"orcid": "Bing He", "result":"Chuan Xing"} -{"orcid": "JULIEN COURCHET", "result":"Franck Polleux"} -{"orcid": "Xiaoyun Pan", "result":"Liru Chen"} -{"orcid": "Marianne Okal", "result":"Brendan Hodge"} -{"orcid": "Michal Fereczkowski", "result":"Silje Grini Nielsen"} -{"orcid": "Nobuyuki Nakai", "result":"Tadafumi Kurogi"} -{"orcid": "Colin Daniel", "result":"Christine Cuyler"} -{"orcid": "Xavier Arnan", "result":"Anna Torné-Noguera"} -{"orcid": "Denita Hadziabdic", "result":"Meher Ony"} -{"orcid": "Kor de Jong", "result":"K. Koning"} -{"orcid": "Chaya Patel", "result":"David Leib"} -{"orcid": "Fagner Carniel", "result":"Adonai Lacruz"} -{"orcid": "Carrie Peltz", "result":"Erica Kornblith"} -{"orcid": "Kathryn Huyvaert", "result":"Larissa L. Bailey"} -{"orcid": "Christine Provost", "result":"Nathalie Sennéchael"} -{"orcid": "Nancy Pachana", "result":"Lisa DiNatale"} -{"orcid": "ARDESHIR BAYAT", "result":"P. Marcos Gorresen"} -{"orcid": "Paul Berkowitz", "result":"Silje Grini Nielsen"} -{"orcid": "Alice Laciny", "result":"Brian Metscher"} -{"orcid": "Octavio Rojas", "result":"Josie A. Griffin"} -{"orcid": "Carlo Sandroni", "result":"Riccardo Scattolini"}