2020-07-13 10:11:36 +02:00
|
|
|
|
|
|
|
package eu.dnetlib.dhp.oa.dedup;
|
|
|
|
|
|
|
|
import java.util.Objects;
|
|
|
|
|
|
|
|
import org.apache.spark.sql.Encoder;
|
|
|
|
import org.apache.spark.sql.Encoders;
|
|
|
|
import org.apache.spark.sql.expressions.Aggregator;
|
|
|
|
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
|
|
|
|
|
|
public class RelationAggregator extends Aggregator<Relation, Relation, Relation> {
|
|
|
|
|
|
|
|
private static Relation ZERO = new Relation();
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Relation zero() {
|
|
|
|
return ZERO;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Relation reduce(Relation b, Relation a) {
|
2020-07-13 16:13:36 +02:00
|
|
|
return mergeRel(b, a);
|
2020-07-13 10:11:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Relation merge(Relation b, Relation a) {
|
2020-07-13 16:13:36 +02:00
|
|
|
return mergeRel(b, a);
|
2020-07-13 10:11:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Relation finish(Relation r) {
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2020-07-13 16:13:36 +02:00
|
|
|
private Relation mergeRel(Relation b, Relation a) {
|
|
|
|
if (Objects.equals(b, ZERO)) {
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
if (Objects.equals(a, ZERO)) {
|
|
|
|
return b;
|
|
|
|
}
|
|
|
|
|
|
|
|
b.mergeFrom(a);
|
|
|
|
return b;
|
|
|
|
}
|
|
|
|
|
2020-07-13 10:11:36 +02:00
|
|
|
@Override
|
|
|
|
public Encoder<Relation> bufferEncoder() {
|
2020-07-13 15:54:51 +02:00
|
|
|
return Encoders.kryo(Relation.class);
|
2020-07-13 10:11:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Encoder<Relation> outputEncoder() {
|
2020-07-13 15:54:51 +02:00
|
|
|
return Encoders.kryo(Relation.class);
|
2020-07-13 10:11:36 +02:00
|
|
|
}
|
|
|
|
}
|