[broker] fixing the mapping of ORCID for the identification of the enrichments #458

Merged
claudio.atzori merged 2 commits from broker_orcid into main 2024-07-15 11:34:02 +02:00
5 changed files with 115 additions and 10 deletions

View File

@ -26,15 +26,15 @@ import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
public class PrepareSimpleEntititiesJob { public class PrepareSimpleEntitiesJob {
private static final Logger log = LoggerFactory.getLogger(PrepareSimpleEntititiesJob.class); private static final Logger log = LoggerFactory.getLogger(PrepareSimpleEntitiesJob.class);
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(
PrepareSimpleEntititiesJob.class PrepareSimpleEntitiesJob.class
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json"))); .getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
parser.parseArgument(args); parser.parseArgument(args);

View File

@ -160,8 +160,7 @@ public class ConversionUtils {
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(pid -> pid.getQualifier() != null) .filter(pid -> pid.getQualifier() != null)
.filter(pid -> pid.getQualifier().getClassid() != null) .filter(pid -> StringUtils.startsWithIgnoreCase(pid.getQualifier().getClassid(), ModelConstants.ORCID))
.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID))
.map(StructuredProperty::getValue) .map(StructuredProperty::getValue)
.map(ConversionUtils::cleanOrcid) .map(ConversionUtils::cleanOrcid)
.filter(StringUtils::isNotBlank) .filter(StringUtils::isNotBlank)

View File

@ -179,7 +179,7 @@
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>PrepareSimpleEntititiesJob</name> <name>PrepareSimpleEntititiesJob</name>
<class>eu.dnetlib.dhp.broker.oa.PrepareSimpleEntititiesJob</class> <class>eu.dnetlib.dhp.broker.oa.PrepareSimpleEntitiesJob</class>
<jar>dhp-broker-events-${projectVersion}.jar</jar> <jar>dhp-broker-events-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCores} --executor-cores=${sparkExecutorCores}

View File

@ -0,0 +1,64 @@
package eu.dnetlib.dhp.broker.oa.matchers.simple;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import eu.dnetlib.broker.objects.OaBrokerAuthor;
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
class EnrichMissingAuthorOrcidTest {
final EnrichMissingAuthorOrcid matcher = new EnrichMissingAuthorOrcid();
@BeforeEach
void setUp() throws Exception {}
@Test
void testFindDifferences_1() {
final OaBrokerMainEntity source = new OaBrokerMainEntity();
final OaBrokerMainEntity target = new OaBrokerMainEntity();
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
assertTrue(list.isEmpty());
}
@Test
void testFindDifferences_2() {
final OaBrokerMainEntity source = new OaBrokerMainEntity();
final OaBrokerMainEntity target = new OaBrokerMainEntity();
source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", null));
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
assertEquals(1, list.size());
}
@Test
void testFindDifferences_3() {
final OaBrokerMainEntity source = new OaBrokerMainEntity();
final OaBrokerMainEntity target = new OaBrokerMainEntity();
source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", null));
target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
assertTrue(list.isEmpty());
}
@Test
void testFindDifferences_4() {
final OaBrokerMainEntity source = new OaBrokerMainEntity();
final OaBrokerMainEntity target = new OaBrokerMainEntity();
source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
assertTrue(list.isEmpty());
}
}

View File

@ -2,27 +2,31 @@
package eu.dnetlib.dhp.broker.oa.util; package eu.dnetlib.dhp.broker.oa.util;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
import eu.dnetlib.broker.objects.OaBrokerTypedValue; import eu.dnetlib.broker.objects.OaBrokerTypedValue;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
class ConversionUtilsTest { public class ConversionUtilsTest {
@BeforeEach @BeforeEach
void setUp() throws Exception { public void setUp() throws Exception {}
}
@Test @Test
void testAllResultPids() { public void testAllResultPids() {
final Qualifier qf = new Qualifier(); final Qualifier qf = new Qualifier();
qf.setClassid("test"); qf.setClassid("test");
qf.setClassname("test"); qf.setClassname("test");
@ -91,4 +95,42 @@ class ConversionUtilsTest {
assertEquals(6, list.size()); assertEquals(6, list.size());
} }
public void testOafResultToBrokerResult() {
final Author a1 = createAuthor("Michele Artini", "0000-0002-4406-428X");
final Author a2 = createAuthor("Claudio Atzori", "http://orcid.org/0000-0001-9613-6639");
final Author a3 = createAuthor("Alessia Bardi", null);
final Result r = new Result();
r.setAuthor(Arrays.asList(a1, a2, a3));
final OaBrokerMainEntity br = ConversionUtils.oafResultToBrokerResult(r);
assertEquals(3, br.getCreators().size());
assertEquals("0000-0002-4406-428X", br.getCreators().get(0).getOrcid());
assertEquals("0000-0001-9613-6639", br.getCreators().get(1).getOrcid());
assertNull(br.getCreators().get(2).getOrcid());
}
private Author createAuthor(final String name, final String orcid) {
final Author a = new Author();
a.setFullname("Michele Artini");
if (orcid != null) {
final Qualifier q = new Qualifier();
q.setClassid(ModelConstants.ORCID);
q.setClassname(ModelConstants.ORCID);
q.setSchemeid("dnet:pids");
q.setSchemename("dnet:pids");
final StructuredProperty pid = new StructuredProperty();
pid.setQualifier(q);
pid.setValue(orcid);
a.setPid(Arrays.asList(pid));
}
return a;
}
} }