2023-11-14 12:04:29 +01:00
|
|
|
|
|
|
|
package eu.dnetlib.dhp.collection.orcid;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
2024-01-10 18:03:20 +01:00
|
|
|
import java.net.URI;
|
2023-11-14 12:04:29 +01:00
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Objects;
|
|
|
|
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
|
import org.apache.hadoop.fs.FileSystem;
|
2024-01-10 18:03:20 +01:00
|
|
|
import org.apache.hadoop.fs.LocalFileSystem;
|
|
|
|
import org.apache.hadoop.fs.Path;
|
|
|
|
import org.apache.hadoop.io.SequenceFile;
|
2023-11-14 12:04:29 +01:00
|
|
|
import org.apache.hadoop.io.Text;
|
2024-01-10 18:03:20 +01:00
|
|
|
import org.apache.hadoop.io.compress.CompressionCodec;
|
|
|
|
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
2023-11-14 12:04:29 +01:00
|
|
|
import org.apache.spark.SparkContext;
|
|
|
|
import org.apache.spark.api.java.JavaSparkContext;
|
|
|
|
import org.apache.spark.sql.Encoders;
|
|
|
|
import org.apache.spark.sql.SparkSession;
|
|
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
|
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
import com.ximpleware.NavException;
|
|
|
|
import com.ximpleware.ParseException;
|
|
|
|
import com.ximpleware.XPathEvalException;
|
|
|
|
import com.ximpleware.XPathParseException;
|
|
|
|
|
|
|
|
import eu.dnetlib.dhp.collection.orcid.model.Author;
|
|
|
|
import eu.dnetlib.dhp.collection.orcid.model.ORCIDItem;
|
2024-01-25 19:40:59 +01:00
|
|
|
import eu.dnetlib.dhp.collection.orcid.model.Work;
|
2023-11-14 12:04:29 +01:00
|
|
|
import eu.dnetlib.dhp.parser.utility.VtdException;
|
|
|
|
|
|
|
|
public class DownloadORCIDTest {
|
|
|
|
private final Logger log = LoggerFactory.getLogger(DownloadORCIDTest.class);
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testSummary() throws Exception {
|
|
|
|
final String xml = IOUtils
|
|
|
|
.toString(
|
|
|
|
Objects.requireNonNull(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/summary.xml")));
|
|
|
|
|
|
|
|
final OrcidParser parser = new OrcidParser();
|
|
|
|
ORCIDItem orcidItem = parser.parseSummary(xml);
|
|
|
|
|
|
|
|
final ObjectMapper mapper = new ObjectMapper();
|
|
|
|
System.out.println(mapper.writeValueAsString(orcidItem));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testParsingWork() throws Exception {
|
|
|
|
|
|
|
|
final List<String> works_path = Arrays
|
|
|
|
.asList(
|
|
|
|
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-2536-4498.xml",
|
|
|
|
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0002-5982-8983.xml",
|
|
|
|
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191.xml",
|
|
|
|
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191-similarity.xml",
|
|
|
|
"/eu/dnetlib/dhp/collection/orcid/activity_work_0000-0003-2760-1191_contributors.xml"
|
|
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
final OrcidParser parser = new OrcidParser();
|
|
|
|
final ObjectMapper mapper = new ObjectMapper();
|
|
|
|
works_path.stream().map(s -> {
|
|
|
|
try {
|
|
|
|
return IOUtils
|
|
|
|
.toString(
|
|
|
|
Objects
|
|
|
|
.requireNonNull(
|
|
|
|
getClass()
|
|
|
|
.getResourceAsStream(
|
|
|
|
s)));
|
|
|
|
} catch (IOException e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}).forEach(s -> {
|
|
|
|
try {
|
|
|
|
System.out.println(mapper.writeValueAsString(parser.parseWork(s)));
|
|
|
|
} catch (Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2024-01-25 19:40:59 +01:00
|
|
|
@Test
|
|
|
|
public void testParsingOrcidUpdateEmployments() throws Exception {
|
|
|
|
final String xml = IOUtils
|
|
|
|
.toString(
|
|
|
|
Objects
|
|
|
|
.requireNonNull(
|
|
|
|
getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/update_employments.xml")));
|
|
|
|
|
|
|
|
final OrcidParser parser = new OrcidParser();
|
|
|
|
final ObjectMapper mapper = new ObjectMapper();
|
|
|
|
System.out.println(mapper.writeValueAsString(parser.parseEmployments(xml)));
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testParsingOrcidUpdateWorks() throws Exception {
|
|
|
|
final String xml = IOUtils
|
|
|
|
.toString(
|
|
|
|
Objects
|
|
|
|
.requireNonNull(
|
|
|
|
getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/orcid/update_work.xml")));
|
|
|
|
|
|
|
|
final OrcidParser parser = new OrcidParser();
|
|
|
|
final List<Work> works = parser.parseWorks(xml);
|
|
|
|
|
|
|
|
final ObjectMapper mapper = new ObjectMapper();
|
|
|
|
System.out.println(mapper.writeValueAsString(works));
|
|
|
|
}
|
|
|
|
|
2023-11-14 12:04:29 +01:00
|
|
|
@Test
|
|
|
|
public void testParsingEmployments() throws Exception {
|
|
|
|
|
|
|
|
final List<String> works_path = Arrays
|
|
|
|
.asList(
|
|
|
|
"/eu/dnetlib/dhp/collection/orcid/employment.xml",
|
|
|
|
"/eu/dnetlib/dhp/collection/orcid/employment_2.xml",
|
|
|
|
"/eu/dnetlib/dhp/collection/orcid/employment_3.xml"
|
|
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
final OrcidParser parser = new OrcidParser();
|
|
|
|
final ObjectMapper mapper = new ObjectMapper();
|
|
|
|
works_path.stream().map(s -> {
|
|
|
|
try {
|
|
|
|
return IOUtils
|
|
|
|
.toString(
|
|
|
|
Objects
|
|
|
|
.requireNonNull(
|
|
|
|
getClass()
|
|
|
|
.getResourceAsStream(
|
|
|
|
s)));
|
|
|
|
} catch (IOException e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}).forEach(s -> {
|
|
|
|
try {
|
|
|
|
System.out.println(mapper.writeValueAsString(parser.parseEmployment(s)));
|
|
|
|
} catch (Exception e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|