1
0
Fork 0

Merge branch 'master' of code-repo.d4science.org:D-Net/dnet-hadoop

This commit is contained in:
Miriam Baglioni 2020-05-18 13:07:36 +02:00
commit 629af7cb79
4 changed files with 39 additions and 27 deletions

View File

@ -129,6 +129,9 @@ public class DedupUtility {
.max(Comparator.comparing(Tuple2::_1));
if (simAuhtor.isPresent() && simAuhtor.get()._1() > THRESHOLD) {
Author r = simAuhtor.get()._2();
if (r.getPid() == null) {
r.setPid(new ArrayList<>());
}
r.getPid().add(a._1());
}
});

View File

@ -53,9 +53,7 @@ public class PrepareResultCommunitySetStep2 {
conf,
isSparkSessionManaged,
spark -> {
if (isTest(parser)) {
removeOutputDir(spark, outputPath);
}
mergeInfo(spark, inputPath, outputPath);
});
}

View File

@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.util.*;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.Element;
@ -16,6 +15,7 @@ import org.dom4j.Node;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@ -42,9 +42,11 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
final String pid = e.attributeValue("nameIdentifier");
final String pidType = e.attributeValue("nameIdentifierScheme");
if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) {
author.setPid(new ArrayList<>());
author.getPid().add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info));
if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) {
author
.getPid()
.add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info));
}
res.add(author);

View File

@ -12,8 +12,6 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach;
@ -22,6 +20,9 @@ import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
@ExtendWith(MockitoExtension.class)
public class MappersTest {
@ -53,12 +54,15 @@ public class MappersTest {
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertTrue(p.getAuthor().size() > 0);
Optional<Author> author = p.getAuthor()
Optional<Author> author = p
.getAuthor()
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst();
assertTrue(author.isPresent());
StructuredProperty pid = author.get().getPid()
StructuredProperty pid = author
.get()
.getPid()
.stream()
.findFirst()
.get();
@ -117,12 +121,15 @@ public class MappersTest {
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
assertTrue(d.getAuthor().size() > 0);
Optional<Author> author = d.getAuthor()
Optional<Author> author = d
.getAuthor()
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst();
assertTrue(author.isPresent());
StructuredProperty pid = author.get().getPid()
StructuredProperty pid = author
.get()
.getPid()
.stream()
.findFirst()
.get();
@ -136,7 +143,9 @@ public class MappersTest {
assertEquals("Theo", author.get().getName());
assertEquals(1, author.get().getAffiliation().size());
Optional<Field<String>> opAff = author.get().getAffiliation()
Optional<Field<String>> opAff = author
.get()
.getAffiliation()
.stream()
.findFirst();
assertTrue(opAff.isPresent());