[enrichment single step] remove parameter from execution
This commit is contained in:
parent
67ce2d54be
commit
82e9e262ee
|
@ -64,7 +64,7 @@ public class SparkResultToProjectThroughSemRelJob {
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
}
|
}
|
||||||
execPropagation(
|
execPropagation(
|
||||||
spark, outputPath, alreadyLinkedPath, potentialUpdatePath, saveGraph);
|
spark, outputPath, alreadyLinkedPath, potentialUpdatePath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,24 +72,23 @@ public class SparkResultToProjectThroughSemRelJob {
|
||||||
SparkSession spark,
|
SparkSession spark,
|
||||||
String outputPath,
|
String outputPath,
|
||||||
String alreadyLinkedPath,
|
String alreadyLinkedPath,
|
||||||
String potentialUpdatePath,
|
String potentialUpdatePath) {
|
||||||
Boolean saveGraph) {
|
|
||||||
|
|
||||||
Dataset<ResultProjectSet> toaddrelations = readPath(spark, potentialUpdatePath, ResultProjectSet.class);
|
Dataset<ResultProjectSet> toaddrelations = readPath(spark, potentialUpdatePath, ResultProjectSet.class);
|
||||||
Dataset<ResultProjectSet> alreadyLinked = readPath(spark, alreadyLinkedPath, ResultProjectSet.class);
|
Dataset<ResultProjectSet> alreadyLinked = readPath(spark, alreadyLinkedPath, ResultProjectSet.class);
|
||||||
|
|
||||||
if (saveGraph) {
|
// if (saveGraph) {
|
||||||
toaddrelations
|
toaddrelations
|
||||||
.joinWith(
|
.joinWith(
|
||||||
alreadyLinked,
|
alreadyLinked,
|
||||||
toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")),
|
toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")),
|
||||||
"left_outer")
|
"left_outer")
|
||||||
.flatMap(mapRelationRn(), Encoders.bean(Relation.class))
|
.flatMap(mapRelationRn(), Encoders.bean(Relation.class))
|
||||||
.write()
|
.write()
|
||||||
.mode(SaveMode.Append)
|
.mode(SaveMode.Append)
|
||||||
.option("compression", "gzip")
|
.option("compression", "gzip")
|
||||||
.json(outputPath);
|
.json(outputPath);
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
private static FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation> mapRelationRn() {
|
private static FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation> mapRelationRn() {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched
|
sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched
|
||||||
resumeFrom=CommunitySemanticRelation
|
resumeFrom=ResultProject
|
||||||
allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo
|
allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo
|
||||||
allowedsemrelsresultproject=isSupplementedBy;isSupplementTo
|
allowedsemrelsresultproject=isSupplementedBy;isSupplementTo
|
||||||
allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo
|
allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo
|
||||||
|
|
|
@ -97,17 +97,8 @@
|
||||||
<arg>--potentialUpdatePath</arg><arg>${workingDir}/resultproject/preparedInfo/potentialUpdates</arg>
|
<arg>--potentialUpdatePath</arg><arg>${workingDir}/resultproject/preparedInfo/potentialUpdates</arg>
|
||||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/resultproject/preparedInfo/alreadyLinked</arg>
|
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/resultproject/preparedInfo/alreadyLinked</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="reset_workingDir"/>
|
|
||||||
<error to="Kill"/>
|
|
||||||
</action>
|
|
||||||
<action name="reset_workingDir">
|
|
||||||
<fs>
|
|
||||||
<delete path="${workingDir}"/>
|
|
||||||
<mkdir path="${workingDir}"/>
|
|
||||||
</fs>
|
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
<end name="End"/>
|
|
||||||
|
|
||||||
</workflow-app>
|
</workflow-app>
|
Loading…
Reference in New Issue