dump of the results related to at least one project #61

Merged
claudio.atzori merged 51 commits from miriam.baglioni/dnet-hadoop:dump into master 2020-12-09 17:22:57 +01:00
1 changed files with 4 additions and 4 deletions
Showing only changes of commit 3319440c53 - Show all commits

View File

@ -76,16 +76,16 @@ public class SparkPrepareResultProject implements Serializable {
private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) { private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) {
Dataset<Relation> relation = Utils Dataset<Relation> relation = Utils
.readPath(spark, inputPath + "/relation", Relation.class) .readPath(spark, inputPath + "/relation", Relation.class)
.filter("dataInfo.deletedbyinference = false and relClass = 'produces'"); .filter("dataInfo.deletedbyinference = false and lower(relClass) = 'isproducedby'");
Dataset<eu.dnetlib.dhp.schema.oaf.Project> projects = Utils Dataset<eu.dnetlib.dhp.schema.oaf.Project> projects = Utils
.readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class); .readPath(spark, inputPath + "/project", eu.dnetlib.dhp.schema.oaf.Project.class);
projects projects
.joinWith(relation, projects.col("id").equalTo(relation.col("source"))) .joinWith(relation, projects.col("id").equalTo(relation.col("target")), "inner")
.groupByKey( .groupByKey(
(MapFunction<Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation>, String>) value -> value (MapFunction<Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation>, String>) value -> value
._2() ._2()
.getTarget(), .getSource(),
Encoders.STRING()) Encoders.STRING())
.mapGroups( .mapGroups(
(MapGroupsFunction<String, Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation>, ResultProject>) (s, (MapGroupsFunction<String, Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation>, ResultProject>) (s,
@ -93,7 +93,7 @@ public class SparkPrepareResultProject implements Serializable {
Set<String> projectSet = new HashSet<>(); Set<String> projectSet = new HashSet<>();
Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation> first = it.next(); Tuple2<eu.dnetlib.dhp.schema.oaf.Project, Relation> first = it.next();
ResultProject rp = new ResultProject(); ResultProject rp = new ResultProject();
rp.setResultId(first._2().getTarget()); rp.setResultId(s);
eu.dnetlib.dhp.schema.oaf.Project p = first._1(); eu.dnetlib.dhp.schema.oaf.Project p = first._1();
projectSet.add(p.getId()); projectSet.add(p.getId());
Project ps = getProject(p); Project ps = getProject(p);