[orcidenrichment] fixing issue
This commit is contained in:
parent
e4f89f9800
commit
227e84be99
|
@ -33,7 +33,7 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors {
|
|||
|
||||
// Create instance and run the Spark application
|
||||
SparkPropagateOrcidAuthor app = new SparkPropagateOrcidAuthor("/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json", args, log);
|
||||
app.run();
|
||||
app.initialize().run();
|
||||
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,7 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors {
|
|||
.keySet().stream().filter(ModelSupport::isResult)
|
||||
.forEach(e -> {
|
||||
Dataset<Row> orcidDnet = spark.read().schema(Encoders.bean(Result.class).schema())
|
||||
.json(graphPath + e.name())
|
||||
.json(graphPath + "/"+ e.name())
|
||||
.as(Encoders.bean(Result.class))
|
||||
.filter((FilterFunction<Result>) r -> r.getAuthor().stream()
|
||||
.anyMatch(a -> a.getPid()
|
||||
|
@ -80,13 +80,13 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors {
|
|||
.selectExpr("_1 as target", "_2 as orcid_authors");
|
||||
|
||||
Dataset<Row> result = spark.read().schema(Encoders.bean(Result.class).schema())
|
||||
.json(graphPath + e.name())
|
||||
.json(graphPath + "/"+ e.name())
|
||||
.as(Encoders.bean(Result.class))
|
||||
.selectExpr("id", "author as graph_authors");
|
||||
|
||||
Dataset<Row> supplements = spark.read()
|
||||
.schema(Encoders.bean(Relation.class).schema())
|
||||
.json(graphPath + "relation")
|
||||
.json(graphPath + "/"+ "relation")
|
||||
.where("relclass IN('" + ModelConstants.IS_SUPPLEMENT_TO + "', '" +
|
||||
ModelConstants.IS_SUPPLEMENTED_BY + "')")
|
||||
.selectExpr("source as id", "target");
|
||||
|
@ -98,7 +98,7 @@ public class SparkPropagateOrcidAuthor extends SparkEnrichWithOrcidAuthors {
|
|||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.option("compression", "gzip")
|
||||
.parquet(targetPath + e.name() + "_unmatched");
|
||||
.parquet(targetPath + "/"+ e.name() + "_unmatched");
|
||||
|
||||
});
|
||||
}
|
||||
|
|
|
@ -112,10 +112,10 @@
|
|||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${sourcePath}/</arg>
|
||||
<arg>--orcidPath</arg><arg>${sourcePath}/</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}/</arg>
|
||||
<arg>--targetPath</arg><arg>${outputPath}/</arg>
|
||||
<arg>--graphPath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--orcidPath</arg><arg>${sourcePath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
<arg>--targetPath</arg><arg>${outputPath}</arg>
|
||||
<arg>--matchingSource</arg><arg>graph</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
|
|
Loading…
Reference in New Issue