2020-06-24 16:29:32 +02:00
|
|
|
|
|
|
|
package eu.dnetlib.doiboost.orcidnodoi.xml;
|
|
|
|
|
2020-06-25 18:43:29 +02:00
|
|
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.text.Normalizer;
|
|
|
|
import java.util.*;
|
|
|
|
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
|
|
import org.apache.commons.text.similarity.JaccardSimilarity;
|
|
|
|
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
|
|
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
|
|
import com.google.gson.Gson;
|
|
|
|
import com.google.gson.GsonBuilder;
|
2020-06-24 16:29:32 +02:00
|
|
|
import com.ximpleware.NavException;
|
|
|
|
import com.ximpleware.ParseException;
|
|
|
|
import com.ximpleware.XPathEvalException;
|
|
|
|
import com.ximpleware.XPathParseException;
|
2020-06-25 18:43:29 +02:00
|
|
|
|
2020-06-24 16:29:32 +02:00
|
|
|
import eu.dnetlib.dhp.parser.utility.VtdException;
|
2020-06-25 18:43:29 +02:00
|
|
|
import eu.dnetlib.dhp.schema.oaf.Author;
|
2020-06-24 16:29:32 +02:00
|
|
|
import eu.dnetlib.doiboost.orcid.model.AuthorData;
|
|
|
|
import eu.dnetlib.doiboost.orcidnodoi.model.Contributor;
|
|
|
|
import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi;
|
2020-06-25 18:43:29 +02:00
|
|
|
import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher;
|
2020-06-24 16:29:32 +02:00
|
|
|
import jdk.nashorn.internal.ir.annotations.Ignore;
|
|
|
|
|
|
|
|
public class OrcidNoDoiTest {
|
|
|
|
|
|
|
|
private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class);
|
|
|
|
|
|
|
|
String nameA = "Khairy";
|
|
|
|
String surnameA = "Abdel Dayem";
|
|
|
|
String otherNameA = "Dayem MKA";
|
|
|
|
String nameB = "K";
|
|
|
|
String surnameB = "Abdel-Dayem";
|
|
|
|
String orcidIdA = "0000-0003-2760-1191";
|
|
|
|
|
|
|
|
@Test
|
2020-10-22 15:43:50 +02:00
|
|
|
// @Ignore
|
2020-07-03 23:30:31 +02:00
|
|
|
public void readPublicationFieldsTest()
|
2020-06-24 16:29:32 +02:00
|
|
|
throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
|
|
|
|
logger.info("running loadPublicationFieldsTest ....");
|
|
|
|
String xml = IOUtils
|
|
|
|
.toString(
|
2020-07-03 23:30:31 +02:00
|
|
|
OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0002-2536-4498.xml"));
|
2020-06-24 16:29:32 +02:00
|
|
|
|
|
|
|
if (xml == null) {
|
|
|
|
logger.info("Resource not found");
|
|
|
|
}
|
|
|
|
XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
|
|
|
|
if (p == null) {
|
|
|
|
logger.info("XMLRecordParserNoDoi null");
|
|
|
|
}
|
|
|
|
WorkDataNoDoi workData = null;
|
|
|
|
try {
|
|
|
|
workData = p.VTDParseWorkData(xml.getBytes());
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.error("parsing xml", e);
|
|
|
|
}
|
|
|
|
assertNotNull(workData);
|
|
|
|
assertNotNull(workData.getOid());
|
|
|
|
logger.info("oid: " + workData.getOid());
|
|
|
|
assertNotNull(workData.getTitles());
|
|
|
|
logger.info("titles: ");
|
|
|
|
workData.getTitles().forEach(t -> {
|
|
|
|
logger.info(t);
|
|
|
|
});
|
|
|
|
logger.info("source: " + workData.getSourceName());
|
|
|
|
logger.info("type: " + workData.getType());
|
|
|
|
logger.info("urls: ");
|
|
|
|
workData.getUrls().forEach(u -> {
|
|
|
|
logger.info(u);
|
|
|
|
});
|
|
|
|
logger.info("publication date: ");
|
|
|
|
workData.getPublicationDates().forEach(d -> {
|
|
|
|
logger.info(d.getYear() + " - " + d.getMonth() + " - " + d.getDay());
|
|
|
|
});
|
|
|
|
logger.info("external id: ");
|
|
|
|
workData.getExtIds().removeIf(e -> e.getRelationShip() != null && !e.getRelationShip().equals("self"));
|
|
|
|
workData.getExtIds().forEach(e -> {
|
|
|
|
logger.info(e.getType() + " - " + e.getValue() + " - " + e.getRelationShip());
|
|
|
|
});
|
|
|
|
logger.info("contributors: ");
|
|
|
|
workData.getContributors().forEach(c -> {
|
|
|
|
logger
|
|
|
|
.info(
|
|
|
|
c.getName() + " - " + c.getRole() + " - " + c.getSequence());
|
|
|
|
});
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
2020-10-22 15:43:50 +02:00
|
|
|
// @Ignore
|
2020-07-02 18:46:20 +02:00
|
|
|
private void authorMatchTest() throws Exception {
|
2020-06-24 16:29:32 +02:00
|
|
|
logger.info("running authorSimpleMatchTest ....");
|
2020-06-25 18:43:29 +02:00
|
|
|
String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml";
|
2020-06-24 16:29:32 +02:00
|
|
|
AuthorData author = new AuthorData();
|
|
|
|
author.setName(nameA);
|
|
|
|
author.setSurname(surnameA);
|
|
|
|
author.setOid(orcidIdA);
|
|
|
|
String xml = IOUtils
|
|
|
|
.toString(
|
|
|
|
OrcidNoDoiTest.class.getResourceAsStream(orcidWork));
|
|
|
|
|
|
|
|
if (xml == null) {
|
|
|
|
logger.info("Resource not found");
|
|
|
|
}
|
|
|
|
XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
|
|
|
|
if (p == null) {
|
|
|
|
logger.info("XMLRecordParserNoDoi null");
|
|
|
|
}
|
|
|
|
WorkDataNoDoi workData = null;
|
|
|
|
try {
|
|
|
|
workData = p.VTDParseWorkData(xml.getBytes());
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.error("parsing xml", e);
|
|
|
|
}
|
|
|
|
assertNotNull(workData);
|
2020-06-25 18:43:29 +02:00
|
|
|
AuthorMatcher.match(author, workData.getContributors());
|
|
|
|
GsonBuilder builder = new GsonBuilder();
|
|
|
|
Gson gson = builder.create();
|
|
|
|
logger.info(gson.toJson(workData));
|
2020-06-24 16:29:32 +02:00
|
|
|
}
|
|
|
|
}
|