forked from D-Net/dnet-hadoop
limiting the maximum number of authors allowed in XML records to MAX_AUTHORS = 200; authors with ORCID can exceed that limit
This commit is contained in:
parent
ef11593068
commit
83504ecace
|
@ -5,7 +5,9 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.function.Predicate;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -22,6 +24,7 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
@ -63,6 +66,8 @@ public class CreateRelatedEntitiesJob_phase2 {
|
||||||
|
|
||||||
private static final int MAX_EXTERNAL_ENTITIES = 50;
|
private static final int MAX_EXTERNAL_ENTITIES = 50;
|
||||||
|
|
||||||
|
private static final int MAX_AUTHORS = 200;
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
|
@ -205,6 +210,16 @@ public class CreateRelatedEntitiesJob_phase2 {
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
r.setExternalReference(refs);
|
r.setExternalReference(refs);
|
||||||
}
|
}
|
||||||
|
if (r.getAuthor() != null && r.getAuthor().size() > MAX_AUTHORS) {
|
||||||
|
List<Author> authors = Lists.newArrayList();
|
||||||
|
for (int i = 0; i < r.getAuthor().size(); i++) {
|
||||||
|
final Author a = r.getAuthor().get(i);
|
||||||
|
if (authors.size() < MAX_AUTHORS || hasORCID(a)) {
|
||||||
|
authors.add(a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.setAuthor(authors);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return e;
|
return e;
|
||||||
}, Encoders.bean(entityClazz))
|
}, Encoders.bean(entityClazz))
|
||||||
|
@ -214,6 +229,18 @@ public class CreateRelatedEntitiesJob_phase2 {
|
||||||
Encoders.bean(TypedRow.class));
|
Encoders.bean(TypedRow.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean hasORCID(Author a) {
|
||||||
|
return a.getPid() != null && a
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.map(StructuredProperty::getQualifier)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.map(Qualifier::getClassid)
|
||||||
|
.filter(StringUtils::isNotBlank)
|
||||||
|
.anyMatch(c -> "orcid".equals(c.toLowerCase()));
|
||||||
|
}
|
||||||
|
|
||||||
private static TypedRow getTypedRow(String type, OafEntity entity)
|
private static TypedRow getTypedRow(String type, OafEntity entity)
|
||||||
throws JsonProcessingException {
|
throws JsonProcessingException {
|
||||||
TypedRow t = new TypedRow();
|
TypedRow t = new TypedRow();
|
||||||
|
|
Loading…
Reference in New Issue