From 6933ec11fbf5fecdacbad4f3782c2316a322ea69 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 25 Jun 2020 11:04:12 +0200 Subject: [PATCH] WIP: prepare relation job --- .../dhp/oa/provision/PrepareRelationsJob.java | 8 ++- .../provision/model/SortableRelationKey.java | 3 +- .../oa/provision/SortableRelationKeyTest.java | 53 ++++++++++--------- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index 4ae822df7f..cf311c690c 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -7,6 +7,8 @@ import java.io.Serializable; import java.util.*; import java.util.function.Supplier; +import javax.annotation.Nullable; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; @@ -21,6 +23,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Predicate; import com.google.common.base.Splitter; import com.google.common.collect.ComparisonChain; import com.google.common.collect.Iterables; @@ -142,7 +145,7 @@ public class PrepareRelationsJob { .repartitionAndSortWithinPartitions(new RelationPartitioner(relPartitions)) .groupBy(Tuple2::_1) .map(Tuple2::_2) - .map(t -> Iterables.limit(t, maxRelations)) + .map(t -> Iterables.filter(t, input -> input._1().getSubRelType().equals("outcome"))) .flatMap(Iterable::iterator) .map(Tuple2::_2) @@ -151,7 +154,8 @@ public class PrepareRelationsJob { .repartitionAndSortWithinPartitions(new RelationPartitioner(relPartitions)) .groupBy(Tuple2::_1) .map(Tuple2::_2) - .map(t -> Iterables.limit(t, maxRelations)) + .map(t -> Iterables.filter(t, input -> input._1().getSubRelType().equals("outcome"))) + // .map(t -> Iterables.limit(t, maxRelations)) .flatMap(Iterable::iterator) .map(Tuple2::_2) .rdd(); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java index e96c4ca5cf..09a1a9d333 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java @@ -62,8 +62,9 @@ public class SortableRelationKey implements Comparable, Ser public int compareTo(SortableRelationKey o) { return ComparisonChain .start() + .compare(getGroupingKey(), o.getGroupingKey()) .compare(getWeight(this), getWeight(o)) - .result() * -1; + .result(); } private Integer getWeight(SortableRelationKey o) { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java index 200800bd89..72f28fdf2d 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java @@ -1,41 +1,42 @@ + package eu.dnetlib.dhp.oa.provision; +import java.io.IOException; +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.oa.provision.model.SortableRelationKey; import eu.dnetlib.dhp.schema.oaf.Relation; -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Test; -import java.io.IOException; -import java.util.List; public class SortableRelationKeyTest { - @Test - public void doTesSorting() throws IOException { - final ObjectMapper mapper = new ObjectMapper(); - final String json = IOUtils.toString(this.getClass().getResourceAsStream("relations.json")); - final List relations = mapper.readValue(json, new TypeReference>() { }); + @Test + public void doTesSorting() throws IOException { + final ObjectMapper mapper = new ObjectMapper(); + final String json = IOUtils.toString(this.getClass().getResourceAsStream("relations.json")); + final List relations = mapper.readValue(json, new TypeReference>() { + }); + relations + .stream() + .map(r -> SortableRelationKey.create(r, r.getSource())) + .sorted() + .forEach( - relations.stream().map(r -> SortableRelationKey.create(r, r.getSource())).sorted() - .forEach( - - it -> { - try { - System.out.println(mapper.writeValueAsString(it)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - }); - - - - - - - } + it -> { + try { + System.out.println(mapper.writeValueAsString(it)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + } }