2020-06-24 08:57:13 +02:00
|
|
|
|
|
|
|
package eu.dnetlib.dhp.broker.oa.util.aggregators.withRels;
|
|
|
|
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
import org.apache.spark.sql.Encoder;
|
|
|
|
import org.apache.spark.sql.Encoders;
|
|
|
|
import org.apache.spark.sql.expressions.Aggregator;
|
|
|
|
|
|
|
|
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
2020-06-25 09:28:13 +02:00
|
|
|
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
2020-06-24 08:57:13 +02:00
|
|
|
import scala.Tuple2;
|
|
|
|
|
|
|
|
public class RelatedDatasetAggregator
|
|
|
|
extends Aggregator<Tuple2<OaBrokerMainEntity, RelatedDataset>, OaBrokerMainEntity, OaBrokerMainEntity> {
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
private static final long serialVersionUID = 6969761680131482557L;
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public OaBrokerMainEntity zero() {
|
|
|
|
return new OaBrokerMainEntity();
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public OaBrokerMainEntity finish(final OaBrokerMainEntity g) {
|
|
|
|
return g;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public OaBrokerMainEntity reduce(final OaBrokerMainEntity g, final Tuple2<OaBrokerMainEntity, RelatedDataset> t) {
|
2020-06-24 09:24:45 +02:00
|
|
|
final OaBrokerMainEntity res = StringUtils.isNotBlank(g.getOpenaireId()) ? g : t._1;
|
2020-06-25 09:28:13 +02:00
|
|
|
if (t._2 != null && res.getDatasets().size() < BrokerConstants.MAX_NUMBER_OF_RELS) {
|
2020-06-24 09:24:45 +02:00
|
|
|
res.getDatasets().add(t._2.getRelDataset());
|
|
|
|
}
|
2020-06-24 08:57:13 +02:00
|
|
|
return res;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public OaBrokerMainEntity merge(final OaBrokerMainEntity g1, final OaBrokerMainEntity g2) {
|
2020-06-24 09:24:45 +02:00
|
|
|
if (StringUtils.isNotBlank(g1.getOpenaireId())) {
|
2020-06-25 09:28:13 +02:00
|
|
|
final int availables = BrokerConstants.MAX_NUMBER_OF_RELS - g1.getDatasets().size();
|
|
|
|
if (availables > 0) {
|
|
|
|
if (g2.getDatasets().size() <= availables) {
|
|
|
|
g1.getDatasets().addAll(g2.getDatasets());
|
|
|
|
} else {
|
|
|
|
g1.getDatasets().addAll(g2.getDatasets().subList(0, availables));
|
|
|
|
}
|
|
|
|
}
|
2020-06-24 08:57:13 +02:00
|
|
|
return g1;
|
|
|
|
} else {
|
|
|
|
return g2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Encoder<OaBrokerMainEntity> bufferEncoder() {
|
|
|
|
return Encoders.bean(OaBrokerMainEntity.class);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Encoder<OaBrokerMainEntity> outputEncoder() {
|
|
|
|
return Encoders.bean(OaBrokerMainEntity.class);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|