added PacePerson to get name surname for authors having only fullname set

This commit is contained in:
Miriam Baglioni 2020-05-25 10:34:30 +02:00
parent b258f99ece
commit 8f51af4e9b
1 changed files with 56 additions and 42 deletions

View File

@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.common.PacePerson;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -129,61 +130,74 @@ public class SparkOrcidToResultFromSemRelJob {
} }
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) { private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) {
boolean toaddpid = false; boolean toaddpid = false;
if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { String author_name = author.getName();
if (StringUtils.isNotEmpty(author.getSurname())) { String author_surname = author.getSurname();
if (autoritative_author
.getSurname()
.trim()
.equalsIgnoreCase(author.getSurname().trim())) {
// have the same surname. Check the name if(StringUtils.isEmpty(author_name) || StringUtils.isEmpty(author_surname)){
if (StringUtils.isNotEmpty(autoritative_author.getName())) { PacePerson pp = new PacePerson(author.getFullname(), false);
if (StringUtils.isNotEmpty(author.getName())) { if (pp.isAccurate()){
if (autoritative_author author_name = pp.getNormalisedFirstName();
.getName() author_surname = pp.getNormalisedSurname();
.trim()
.equalsIgnoreCase(author.getName().trim())) { }
toaddpid = true; }
}
// they could be differently written (i.e. only the initials of the name if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
// in one of the two if (StringUtils.isNotEmpty(author_surname)) {
else { if (autoritative_author
.getSurname()
.trim()
.equalsIgnoreCase(author_surname.trim())) {
// have the same surname. Check the name
if (StringUtils.isNotEmpty(autoritative_author.getName())) {
if (StringUtils.isNotEmpty(author_name)) {
if (autoritative_author if (autoritative_author
.getName() .getName()
.trim() .trim()
.substring(0, 0) .equalsIgnoreCase(author_name.trim())) {
.equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
toaddpid = true; toaddpid = true;
} }
// they could be differently written (i.e. only the initials of the name
// in one of the two
else {
if (autoritative_author
.getName()
.trim()
.substring(0, 0)
.equalsIgnoreCase(author_name.trim().substring(0, 0))) {
toaddpid = true;
}
}
} }
} }
} }
} }
} }
} if (toaddpid) {
if (toaddpid) { StructuredProperty p = new StructuredProperty();
StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid());
p.setValue(autoritative_author.getOrcid()); p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID));
p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID)); p
p .setDataInfo(
.setDataInfo( getDataInfo(
getDataInfo( PROPAGATION_DATA_INFO_TYPE,
PROPAGATION_DATA_INFO_TYPE, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID,
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME));
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME));
Optional<List<StructuredProperty>> authorPid = Optional.ofNullable(author.getPid());
if (authorPid.isPresent()) {
authorPid.get().add(p);
} else {
author.setPid(Lists.newArrayList(p));
}
Optional<List<StructuredProperty>> authorPid = Optional.ofNullable(author.getPid());
if (authorPid.isPresent()) {
authorPid.get().add(p);
} else {
author.setPid(Lists.newArrayList(p));
} }
return toaddpid;
} }
return toaddpid;
}
private static boolean containsAllowedPid(Author a) { private static boolean containsAllowedPid(Author a) {
Optional<List<StructuredProperty>> pids = Optional.ofNullable(a.getPid()); Optional<List<StructuredProperty>> pids = Optional.ofNullable(a.getPid());