Merge branch 'master' of code-repo.d4science.org:D-Net/dnet-hadoop

This commit is contained in:
Miriam Baglioni 2020-05-18 13:07:36 +02:00
commit 629af7cb79
4 changed files with 39 additions and 27 deletions

View File

@ -129,6 +129,9 @@ public class DedupUtility {
.max(Comparator.comparing(Tuple2::_1)); .max(Comparator.comparing(Tuple2::_1));
if (simAuhtor.isPresent() && simAuhtor.get()._1() > THRESHOLD) { if (simAuhtor.isPresent() && simAuhtor.get()._1() > THRESHOLD) {
Author r = simAuhtor.get()._2(); Author r = simAuhtor.get()._2();
if (r.getPid() == null) {
r.setPid(new ArrayList<>());
}
r.getPid().add(a._1()); r.getPid().add(a._1());
} }
}); });

View File

@ -53,9 +53,7 @@ public class PrepareResultCommunitySetStep2 {
conf, conf,
isSparkSessionManaged, isSparkSessionManaged,
spark -> { spark -> {
if (isTest(parser)) { removeOutputDir(spark, outputPath);
removeOutputDir(spark, outputPath);
}
mergeInfo(spark, inputPath, outputPath); mergeInfo(spark, inputPath, outputPath);
}); });
} }

View File

@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Element; import org.dom4j.Element;
@ -16,6 +15,7 @@ import org.dom4j.Node;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson; import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
public class OafToOafMapper extends AbstractMdRecordToOafMapper { public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@ -42,9 +42,11 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
final String pid = e.attributeValue("nameIdentifier"); final String pid = e.attributeValue("nameIdentifier");
final String pidType = e.attributeValue("nameIdentifierScheme"); final String pidType = e.attributeValue("nameIdentifierScheme");
author.setPid(new ArrayList<>());
if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) { if (StringUtils.isNotBlank(pid) && StringUtils.isNotBlank(pidType)) {
author.setPid(new ArrayList<>()); author
author.getPid().add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info)); .getPid()
.add(structuredProperty(pid, qualifier(pidType, pidType, DNET_PID_TYPES, DNET_PID_TYPES), info));
} }
res.add(author); res.add(author);

View File

@ -12,8 +12,6 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
@ -22,6 +20,9 @@ import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock; import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class MappersTest { public class MappersTest {
@ -53,15 +54,18 @@ public class MappersTest {
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertTrue(p.getAuthor().size() > 0); assertTrue(p.getAuthor().size() > 0);
Optional<Author> author = p.getAuthor() Optional<Author> author = p
.stream() .getAuthor()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .stream()
.findFirst(); .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst();
assertTrue(author.isPresent()); assertTrue(author.isPresent());
StructuredProperty pid = author.get().getPid() StructuredProperty pid = author
.stream() .get()
.findFirst() .getPid()
.get(); .stream()
.findFirst()
.get();
assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals("0000-0001-6651-1178", pid.getValue());
assertEquals("ORCID", pid.getQualifier().getClassid()); assertEquals("ORCID", pid.getQualifier().getClassid());
assertEquals("ORCID", pid.getQualifier().getClassname()); assertEquals("ORCID", pid.getQualifier().getClassname());
@ -117,15 +121,18 @@ public class MappersTest {
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
assertTrue(d.getAuthor().size() > 0); assertTrue(d.getAuthor().size() > 0);
Optional<Author> author = d.getAuthor() Optional<Author> author = d
.stream() .getAuthor()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .stream()
.findFirst(); .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.findFirst();
assertTrue(author.isPresent()); assertTrue(author.isPresent());
StructuredProperty pid = author.get().getPid() StructuredProperty pid = author
.stream() .get()
.findFirst() .getPid()
.get(); .stream()
.findFirst()
.get();
assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals("0000-0001-9074-1619", pid.getValue());
assertEquals("ORCID", pid.getQualifier().getClassid()); assertEquals("ORCID", pid.getQualifier().getClassid());
assertEquals("ORCID", pid.getQualifier().getClassname()); assertEquals("ORCID", pid.getQualifier().getClassname());
@ -136,9 +143,11 @@ public class MappersTest {
assertEquals("Theo", author.get().getName()); assertEquals("Theo", author.get().getName());
assertEquals(1, author.get().getAffiliation().size()); assertEquals(1, author.get().getAffiliation().size());
Optional<Field<String>> opAff = author.get().getAffiliation() Optional<Field<String>> opAff = author
.stream() .get()
.findFirst(); .getAffiliation()
.stream()
.findFirst();
assertTrue(opAff.isPresent()); assertTrue(opAff.isPresent());
Field<String> affiliation = opAff.get(); Field<String> affiliation = opAff.get();
assertEquals("ISTI-CNR", affiliation.getValue()); assertEquals("ISTI-CNR", affiliation.getValue());