1
0
Fork 0

[enrichment single step] remove parameter from execution

This commit is contained in:
Miriam Baglioni 2024-01-17 17:38:03 +01:00
parent 67ce2d54be
commit 82e9e262ee
3 changed files with 15 additions and 25 deletions

View File

@ -64,7 +64,7 @@ public class SparkResultToProjectThroughSemRelJob {
removeOutputDir(spark, outputPath);
}
execPropagation(
spark, outputPath, alreadyLinkedPath, potentialUpdatePath, saveGraph);
spark, outputPath, alreadyLinkedPath, potentialUpdatePath);
});
}
@ -72,24 +72,23 @@ public class SparkResultToProjectThroughSemRelJob {
SparkSession spark,
String outputPath,
String alreadyLinkedPath,
String potentialUpdatePath,
Boolean saveGraph) {
String potentialUpdatePath) {
Dataset<ResultProjectSet> toaddrelations = readPath(spark, potentialUpdatePath, ResultProjectSet.class);
Dataset<ResultProjectSet> alreadyLinked = readPath(spark, alreadyLinkedPath, ResultProjectSet.class);
if (saveGraph) {
toaddrelations
.joinWith(
alreadyLinked,
toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")),
"left_outer")
.flatMap(mapRelationRn(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath);
}
// if (saveGraph) {
toaddrelations
.joinWith(
alreadyLinked,
toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")),
"left_outer")
.flatMap(mapRelationRn(), Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(outputPath);
// }
}
private static FlatMapFunction<Tuple2<ResultProjectSet, ResultProjectSet>, Relation> mapRelationRn() {

View File

@ -1,5 +1,5 @@
sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched
resumeFrom=CommunitySemanticRelation
resumeFrom=ResultProject
allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo
allowedsemrelsresultproject=isSupplementedBy;isSupplementTo
allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo

View File

@ -97,17 +97,8 @@
<arg>--potentialUpdatePath</arg><arg>${workingDir}/resultproject/preparedInfo/potentialUpdates</arg>
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/resultproject/preparedInfo/alreadyLinked</arg>
</spark>
<ok to="reset_workingDir"/>
<error to="Kill"/>
</action>
<action name="reset_workingDir">
<fs>
<delete path="${workingDir}"/>
<mkdir path="${workingDir}"/>
</fs>
<ok to="End"/>
<error to="Kill"/>
</action>
<end name="End"/>
</workflow-app>