forked from D-Net/dnet-hadoop
WIP: prepare relation job
This commit is contained in:
parent
a6c0faac70
commit
6933ec11fb
|
@ -7,6 +7,8 @@ import java.io.Serializable;
|
|||
import java.util.*;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -21,6 +23,7 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Splitter;
|
||||
import com.google.common.collect.ComparisonChain;
|
||||
import com.google.common.collect.Iterables;
|
||||
|
@ -142,7 +145,7 @@ public class PrepareRelationsJob {
|
|||
.repartitionAndSortWithinPartitions(new RelationPartitioner(relPartitions))
|
||||
.groupBy(Tuple2::_1)
|
||||
.map(Tuple2::_2)
|
||||
.map(t -> Iterables.limit(t, maxRelations))
|
||||
.map(t -> Iterables.filter(t, input -> input._1().getSubRelType().equals("outcome")))
|
||||
.flatMap(Iterable::iterator)
|
||||
.map(Tuple2::_2)
|
||||
|
||||
|
@ -151,7 +154,8 @@ public class PrepareRelationsJob {
|
|||
.repartitionAndSortWithinPartitions(new RelationPartitioner(relPartitions))
|
||||
.groupBy(Tuple2::_1)
|
||||
.map(Tuple2::_2)
|
||||
.map(t -> Iterables.limit(t, maxRelations))
|
||||
.map(t -> Iterables.filter(t, input -> input._1().getSubRelType().equals("outcome")))
|
||||
// .map(t -> Iterables.limit(t, maxRelations))
|
||||
.flatMap(Iterable::iterator)
|
||||
.map(Tuple2::_2)
|
||||
.rdd();
|
||||
|
|
|
@ -62,8 +62,9 @@ public class SortableRelationKey implements Comparable<SortableRelationKey>, Ser
|
|||
public int compareTo(SortableRelationKey o) {
|
||||
return ComparisonChain
|
||||
.start()
|
||||
.compare(getGroupingKey(), o.getGroupingKey())
|
||||
.compare(getWeight(this), getWeight(o))
|
||||
.result() * -1;
|
||||
.result();
|
||||
}
|
||||
|
||||
private Integer getWeight(SortableRelationKey o) {
|
||||
|
|
|
@ -1,14 +1,18 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.provision;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.oa.provision.model.SortableRelationKey;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public class SortableRelationKeyTest {
|
||||
|
||||
|
@ -16,10 +20,13 @@ public class SortableRelationKeyTest {
|
|||
public void doTesSorting() throws IOException {
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
final String json = IOUtils.toString(this.getClass().getResourceAsStream("relations.json"));
|
||||
final List<Relation> relations = mapper.readValue(json, new TypeReference<List<Relation>>() { });
|
||||
final List<Relation> relations = mapper.readValue(json, new TypeReference<List<Relation>>() {
|
||||
});
|
||||
|
||||
|
||||
relations.stream().map(r -> SortableRelationKey.create(r, r.getSource())).sorted()
|
||||
relations
|
||||
.stream()
|
||||
.map(r -> SortableRelationKey.create(r, r.getSource()))
|
||||
.sorted()
|
||||
.forEach(
|
||||
|
||||
it -> {
|
||||
|
@ -30,12 +37,6 @@ public class SortableRelationKeyTest {
|
|||
}
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue