1
0
Fork 0

refactoring

This commit is contained in:
Miriam Baglioni 2020-05-26 09:21:49 +02:00
parent d3d36647d2
commit eea07f4c42
2 changed files with 52 additions and 55 deletions

View File

@ -61,7 +61,6 @@ public class BlackListTest {
spark.stop();
}
@Test
public void noRemoveTest() throws Exception {
SparkRemoveBlacklistedRelationJob

View File

@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import java.util.List;
import java.util.Optional;
import eu.dnetlib.dhp.common.PacePerson;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
@ -23,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
@ -124,78 +124,76 @@ public class SparkOrcidToResultFromSemRelJob {
private static void enrichAuthor(Author a, List<AutoritativeAuthor> au) {
PacePerson pp = new PacePerson(a.getFullname(), false);
for (AutoritativeAuthor aa : au) {
if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname() )) {
if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname())) {
return;
}
}
}
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author,
String author_name,
String author_surname) {
boolean toaddpid = false;
String author_name,
String author_surname) {
boolean toaddpid = false;
if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
if (StringUtils.isNotEmpty(author.getSurname())) {
author_surname = author.getSurname();
}
if (StringUtils.isNotEmpty(author_surname)) {
if (autoritative_author
.getSurname()
.trim()
.equalsIgnoreCase(author_surname.trim())) {
if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
if (StringUtils.isNotEmpty(author.getSurname())){
author_surname = author.getSurname();
}
if (StringUtils.isNotEmpty(author_surname)) {
if (autoritative_author
.getSurname()
.trim()
.equalsIgnoreCase(author_surname.trim())) {
// have the same surname. Check the name
if (StringUtils.isNotEmpty(autoritative_author.getName())) {
if(StringUtils.isNotEmpty(author.getName())){
author_name = author.getName();
// have the same surname. Check the name
if (StringUtils.isNotEmpty(autoritative_author.getName())) {
if (StringUtils.isNotEmpty(author.getName())) {
author_name = author.getName();
}
if (StringUtils.isNotEmpty(author_name)) {
if (autoritative_author
.getName()
.trim()
.equalsIgnoreCase(author_name.trim())) {
toaddpid = true;
}
if (StringUtils.isNotEmpty(author_name)) {
// they could be differently written (i.e. only the initials of the name
// in one of the two
else {
if (autoritative_author
.getName()
.trim()
.equalsIgnoreCase(author_name.trim())) {
.getName()
.trim()
.substring(0, 0)
.equalsIgnoreCase(author_name.trim().substring(0, 0))) {
toaddpid = true;
}
// they could be differently written (i.e. only the initials of the name
// in one of the two
else {
if (autoritative_author
.getName()
.trim()
.substring(0, 0)
.equalsIgnoreCase(author_name.trim().substring(0, 0))) {
toaddpid = true;
}
}
}
}
}
}
}
if (toaddpid) {
StructuredProperty p = new StructuredProperty();
p.setValue(autoritative_author.getOrcid());
p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID));
p
.setDataInfo(
getDataInfo(
PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID,
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME));
Optional<List<StructuredProperty>> authorPid = Optional.ofNullable(author.getPid());
if (authorPid.isPresent()) {
authorPid.get().add(p);
} else {
author.setPid(Lists.newArrayList(p));
}
}
return toaddpid;
}
if (toaddpid) {
StructuredProperty p = new StructuredProperty();
p.setValue(autoritative_author.getOrcid());
p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID));
p
.setDataInfo(
getDataInfo(
PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID,
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME));
Optional<List<StructuredProperty>> authorPid = Optional.ofNullable(author.getPid());
if (authorPid.isPresent()) {
authorPid.get().add(p);
} else {
author.setPid(Lists.newArrayList(p));
}
}
return toaddpid;
}
private static boolean containsAllowedPid(Author a) {
Optional<List<StructuredProperty>> pids = Optional.ofNullable(a.getPid());