forked from D-Net/dnet-hadoop
refactoring
This commit is contained in:
parent
d3d36647d2
commit
eea07f4c42
|
@ -61,7 +61,6 @@ public class BlackListTest {
|
||||||
spark.stop();
|
spark.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void noRemoveTest() throws Exception {
|
public void noRemoveTest() throws Exception {
|
||||||
SparkRemoveBlacklistedRelationJob
|
SparkRemoveBlacklistedRelationJob
|
||||||
|
|
|
@ -7,7 +7,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.PacePerson;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -23,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.PacePerson;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
@ -124,78 +124,76 @@ public class SparkOrcidToResultFromSemRelJob {
|
||||||
private static void enrichAuthor(Author a, List<AutoritativeAuthor> au) {
|
private static void enrichAuthor(Author a, List<AutoritativeAuthor> au) {
|
||||||
PacePerson pp = new PacePerson(a.getFullname(), false);
|
PacePerson pp = new PacePerson(a.getFullname(), false);
|
||||||
for (AutoritativeAuthor aa : au) {
|
for (AutoritativeAuthor aa : au) {
|
||||||
if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname() )) {
|
if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname())) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author,
|
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author,
|
||||||
String author_name,
|
String author_name,
|
||||||
String author_surname) {
|
String author_surname) {
|
||||||
boolean toaddpid = false;
|
boolean toaddpid = false;
|
||||||
|
|
||||||
|
if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
|
||||||
|
if (StringUtils.isNotEmpty(author.getSurname())) {
|
||||||
|
author_surname = author.getSurname();
|
||||||
|
}
|
||||||
|
if (StringUtils.isNotEmpty(author_surname)) {
|
||||||
|
if (autoritative_author
|
||||||
|
.getSurname()
|
||||||
|
.trim()
|
||||||
|
.equalsIgnoreCase(author_surname.trim())) {
|
||||||
|
|
||||||
if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
|
// have the same surname. Check the name
|
||||||
if (StringUtils.isNotEmpty(author.getSurname())){
|
if (StringUtils.isNotEmpty(autoritative_author.getName())) {
|
||||||
author_surname = author.getSurname();
|
if (StringUtils.isNotEmpty(author.getName())) {
|
||||||
}
|
author_name = author.getName();
|
||||||
if (StringUtils.isNotEmpty(author_surname)) {
|
}
|
||||||
if (autoritative_author
|
if (StringUtils.isNotEmpty(author_name)) {
|
||||||
.getSurname()
|
if (autoritative_author
|
||||||
.trim()
|
.getName()
|
||||||
.equalsIgnoreCase(author_surname.trim())) {
|
.trim()
|
||||||
|
.equalsIgnoreCase(author_name.trim())) {
|
||||||
// have the same surname. Check the name
|
toaddpid = true;
|
||||||
if (StringUtils.isNotEmpty(autoritative_author.getName())) {
|
|
||||||
if(StringUtils.isNotEmpty(author.getName())){
|
|
||||||
author_name = author.getName();
|
|
||||||
}
|
}
|
||||||
if (StringUtils.isNotEmpty(author_name)) {
|
// they could be differently written (i.e. only the initials of the name
|
||||||
|
// in one of the two
|
||||||
|
else {
|
||||||
if (autoritative_author
|
if (autoritative_author
|
||||||
.getName()
|
.getName()
|
||||||
.trim()
|
.trim()
|
||||||
.equalsIgnoreCase(author_name.trim())) {
|
.substring(0, 0)
|
||||||
|
.equalsIgnoreCase(author_name.trim().substring(0, 0))) {
|
||||||
toaddpid = true;
|
toaddpid = true;
|
||||||
}
|
}
|
||||||
// they could be differently written (i.e. only the initials of the name
|
|
||||||
// in one of the two
|
|
||||||
else {
|
|
||||||
if (autoritative_author
|
|
||||||
.getName()
|
|
||||||
.trim()
|
|
||||||
.substring(0, 0)
|
|
||||||
.equalsIgnoreCase(author_name.trim().substring(0, 0))) {
|
|
||||||
toaddpid = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (toaddpid) {
|
|
||||||
StructuredProperty p = new StructuredProperty();
|
|
||||||
p.setValue(autoritative_author.getOrcid());
|
|
||||||
p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID));
|
|
||||||
p
|
|
||||||
.setDataInfo(
|
|
||||||
getDataInfo(
|
|
||||||
PROPAGATION_DATA_INFO_TYPE,
|
|
||||||
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID,
|
|
||||||
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME));
|
|
||||||
|
|
||||||
Optional<List<StructuredProperty>> authorPid = Optional.ofNullable(author.getPid());
|
|
||||||
if (authorPid.isPresent()) {
|
|
||||||
authorPid.get().add(p);
|
|
||||||
} else {
|
|
||||||
author.setPid(Lists.newArrayList(p));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
return toaddpid;
|
|
||||||
}
|
}
|
||||||
|
if (toaddpid) {
|
||||||
|
StructuredProperty p = new StructuredProperty();
|
||||||
|
p.setValue(autoritative_author.getOrcid());
|
||||||
|
p.setQualifier(getQualifier(PROPAGATION_AUTHOR_PID, PROPAGATION_AUTHOR_PID));
|
||||||
|
p
|
||||||
|
.setDataInfo(
|
||||||
|
getDataInfo(
|
||||||
|
PROPAGATION_DATA_INFO_TYPE,
|
||||||
|
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID,
|
||||||
|
PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME));
|
||||||
|
|
||||||
|
Optional<List<StructuredProperty>> authorPid = Optional.ofNullable(author.getPid());
|
||||||
|
if (authorPid.isPresent()) {
|
||||||
|
authorPid.get().add(p);
|
||||||
|
} else {
|
||||||
|
author.setPid(Lists.newArrayList(p));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return toaddpid;
|
||||||
|
}
|
||||||
|
|
||||||
private static boolean containsAllowedPid(Author a) {
|
private static boolean containsAllowedPid(Author a) {
|
||||||
Optional<List<StructuredProperty>> pids = Optional.ofNullable(a.getPid());
|
Optional<List<StructuredProperty>> pids = Optional.ofNullable(a.getPid());
|
||||||
|
|
Loading…
Reference in New Issue