forked from D-Net/dnet-hadoop
limiting the maximum number of authors allowed in XML records to MAX_AUTHORS = 200; authors with ORCID can exceed that limit
This commit is contained in:
parent
ef11593068
commit
83504ecace
|
@ -5,7 +5,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
@ -22,6 +24,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
|
@ -63,6 +66,8 @@ public class CreateRelatedEntitiesJob_phase2 {
|
|||
|
||||
private static final int MAX_EXTERNAL_ENTITIES = 50;
|
||||
|
||||
private static final int MAX_AUTHORS = 200;
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
String jsonConfiguration = IOUtils
|
||||
|
@ -205,6 +210,16 @@ public class CreateRelatedEntitiesJob_phase2 {
|
|||
.collect(Collectors.toList());
|
||||
r.setExternalReference(refs);
|
||||
}
|
||||
if (r.getAuthor() != null && r.getAuthor().size() > MAX_AUTHORS) {
|
||||
List<Author> authors = Lists.newArrayList();
|
||||
for (int i = 0; i < r.getAuthor().size(); i++) {
|
||||
final Author a = r.getAuthor().get(i);
|
||||
if (authors.size() < MAX_AUTHORS || hasORCID(a)) {
|
||||
authors.add(a);
|
||||
}
|
||||
}
|
||||
r.setAuthor(authors);
|
||||
}
|
||||
}
|
||||
return e;
|
||||
}, Encoders.bean(entityClazz))
|
||||
|
@ -214,6 +229,18 @@ public class CreateRelatedEntitiesJob_phase2 {
|
|||
Encoders.bean(TypedRow.class));
|
||||
}
|
||||
|
||||
private static boolean hasORCID(Author a) {
|
||||
return a.getPid() != null && a
|
||||
.getPid()
|
||||
.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(StructuredProperty::getQualifier)
|
||||
.filter(Objects::nonNull)
|
||||
.map(Qualifier::getClassid)
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.anyMatch(c -> "orcid".equals(c.toLowerCase()));
|
||||
}
|
||||
|
||||
private static TypedRow getTypedRow(String type, OafEntity entity)
|
||||
throws JsonProcessingException {
|
||||
TypedRow t = new TypedRow();
|
||||
|
|
Loading…
Reference in New Issue