dnet-hadoop/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java

184 lines
6.5 KiB
Java

package eu.dnetlib.doiboost.orcidnodoi.xml;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.text.Normalizer;
import java.util.*;
import javax.validation.constraints.AssertTrue;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.similarity.JaccardSimilarity;
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
import org.junit.jupiter.api.Test;
import org.mortbay.log.Log;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.ximpleware.NavException;
import com.ximpleware.ParseException;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
import eu.dnetlib.dhp.parser.utility.VtdException;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.doiboost.orcid.model.AuthorData;
import eu.dnetlib.doiboost.orcidnodoi.model.Contributor;
import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi;
import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher;
import jdk.nashorn.internal.ir.annotations.Ignore;
public class OrcidNoDoiTest {
private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class);
String nameA = "Khairy";
String surnameA = "Abdel Dayem";
String otherNameA = "Dayem MKA";
String nameB = "K";
String surnameB = "Abdel-Dayem";
String orcidIdA = "0000-0003-2760-1191";
@Test
public void readPublicationFieldsTest()
throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
logger.info("running loadPublicationFieldsTest ....");
String xml = IOUtils
.toString(
OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0002-2536-4498.xml"));
if (xml == null) {
logger.info("Resource not found");
}
XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
if (p == null) {
logger.info("XMLRecordParserNoDoi null");
}
WorkDataNoDoi workData = null;
try {
workData = p.VTDParseWorkData(xml.getBytes());
} catch (Exception e) {
logger.error("parsing xml", e);
}
assertNotNull(workData);
assertNotNull(workData.getOid());
logger.info("oid: " + workData.getOid());
assertNotNull(workData.getTitles());
logger.info("titles: ");
workData.getTitles().forEach(t -> {
logger.info(t);
});
logger.info("source: " + workData.getSourceName());
logger.info("type: " + workData.getType());
logger.info("urls: ");
workData.getUrls().forEach(u -> {
logger.info(u);
});
logger.info("publication date: ");
workData.getPublicationDates().forEach(d -> {
logger.info(d.getYear() + " - " + d.getMonth() + " - " + d.getDay());
});
logger.info("external id: ");
workData.getExtIds().removeIf(e -> e.getRelationShip() != null && !e.getRelationShip().equals("self"));
workData.getExtIds().forEach(e -> {
logger.info(e.getType() + " - " + e.getValue() + " - " + e.getRelationShip());
});
logger.info("contributors: ");
workData.getContributors().forEach(c -> {
logger
.info(
c.getName() + " - " + c.getRole() + " - " + c.getSequence());
});
}
@Test
public void authorMatchTest() throws Exception {
logger.info("running authorSimpleMatchTest ....");
String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml";
AuthorData author = new AuthorData();
author.setName(nameA);
author.setSurname(surnameA);
author.setOid(orcidIdA);
String xml = IOUtils
.toString(
OrcidNoDoiTest.class.getResourceAsStream(orcidWork));
if (xml == null) {
logger.info("Resource not found");
}
XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
if (p == null) {
logger.info("XMLRecordParserNoDoi null");
}
WorkDataNoDoi workData = null;
try {
workData = p.VTDParseWorkData(xml.getBytes());
} catch (Exception e) {
logger.error("parsing xml", e);
}
assertNotNull(workData);
Contributor a = workData.getContributors().get(0);
assertTrue(a.getCreditName().equals("Abdel-Dayem K"));
AuthorMatcher.match(author, workData.getContributors());
GsonBuilder builder = new GsonBuilder();
Gson gson = builder.create();
logger.info(gson.toJson(workData));
assertTrue(workData.getContributors().size() == 6);
Contributor c = workData.getContributors().get(0);
assertTrue(c.getOid().equals("0000-0003-2760-1191"));
assertTrue(c.getName().equals("Khairy"));
assertTrue(c.getSurname().equals("Abdel Dayem"));
assertTrue(c.getCreditName().equals("Abdel-Dayem K"));
}
@Test
public void readContributorsTest()
throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
logger.info("running loadPublicationFieldsTest ....");
String xml = IOUtils
.toString(
OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191_contributors.xml"));
if (xml == null) {
logger.info("Resource not found");
}
XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
if (p == null) {
logger.info("XMLRecordParserNoDoi null");
}
WorkDataNoDoi workData = null;
try {
workData = p.VTDParseWorkData(xml.getBytes());
} catch (Exception e) {
logger.error("parsing xml", e);
}
assertNotNull(workData.getContributors());
assertTrue(workData.getContributors().size() == 5);
assertTrue(StringUtils.isBlank(workData.getContributors().get(0).getCreditName()));
assertTrue(workData.getContributors().get(0).getSequence().equals("seq0"));
assertTrue(workData.getContributors().get(0).getRole().equals("role0"));
assertTrue(workData.getContributors().get(1).getCreditName().equals("creditname1"));
assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getSequence()));
assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getRole()));
assertTrue(workData.getContributors().get(2).getCreditName().equals("creditname2"));
assertTrue(workData.getContributors().get(2).getSequence().equals("seq2"));
assertTrue(StringUtils.isBlank(workData.getContributors().get(2).getRole()));
assertTrue(workData.getContributors().get(3).getCreditName().equals("creditname3"));
assertTrue(StringUtils.isBlank(workData.getContributors().get(3).getSequence()));
assertTrue(workData.getContributors().get(3).getRole().equals("role3"));
assertTrue(StringUtils.isBlank(workData.getContributors().get(4).getCreditName()));
assertTrue(workData.getContributors().get(4).getSequence().equals("seq4"));
assertTrue(workData.getContributors().get(4).getRole().equals("role4"));
}
}