dnet-hadoop/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Block.java

81 lines
1.8 KiB
Java

package eu.dnetlib.dhp.oa.dedup.model;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import com.google.common.collect.Lists;
import eu.dnetlib.pace.model.MapDocument;
public class Block implements Serializable {
private String key;
private List<MapDocument> documents;
public Block() {
super();
}
public static Block from(String key, MapDocument doc) {
Block block = new Block();
block.setKey(key);
block.setDocuments(Lists.newArrayList(doc));
return block;
}
public static Block from(String key, Iterator<Block> blocks, String orderField, int maxSize) {
Block block = new Block();
block.setKey(key);
Iterable<Block> it = () -> blocks;
block
.setDocuments(
StreamSupport
.stream(it.spliterator(), false)
.flatMap(b -> b.getDocuments().stream())
.sorted(Comparator.comparing(a -> a.getFieldMap().get(orderField).stringValue()))
.limit(maxSize)
.collect(Collectors.toCollection(ArrayList::new)));
return block;
}
public static Block from(Block b1, Block b2, String orderField, int maxSize) {
Block block = new Block();
block.setKey(b1.getKey());
block
.setDocuments(
Stream
.concat(b1.getDocuments().stream(), b2.getDocuments().stream())
.sorted(Comparator.comparing(a -> a.getFieldMap().get(orderField).stringValue()))
.limit(maxSize)
.collect(Collectors.toCollection(ArrayList::new)));
return block;
}
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public List<MapDocument> getDocuments() {
return documents;
}
public void setDocuments(List<MapDocument> documents) {
this.documents = documents;
}
}