2020-05-04 11:51:17 +02:00
|
|
|
|
|
|
|
package eu.dnetlib.dhp.oa.provision;
|
|
|
|
|
2021-12-23 12:33:53 +01:00
|
|
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
2021-05-13 16:11:22 +02:00
|
|
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
|
|
|
|
2020-05-04 11:51:17 +02:00
|
|
|
import java.io.IOException;
|
2021-12-23 12:33:53 +01:00
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.net.URL;
|
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.List;
|
2020-05-04 11:51:17 +02:00
|
|
|
|
|
|
|
import javax.xml.transform.Transformer;
|
|
|
|
import javax.xml.transform.TransformerException;
|
|
|
|
|
|
|
|
import org.apache.commons.io.IOUtils;
|
2021-05-13 16:11:22 +02:00
|
|
|
import org.apache.solr.client.solrj.util.ClientUtils;
|
|
|
|
import org.apache.solr.common.SolrInputDocument;
|
|
|
|
import org.junit.jupiter.api.Assertions;
|
2020-05-04 11:51:17 +02:00
|
|
|
import org.junit.jupiter.api.BeforeEach;
|
|
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
|
2021-05-13 16:11:22 +02:00
|
|
|
import com.google.common.collect.Lists;
|
|
|
|
|
|
|
|
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
|
|
|
|
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
|
|
|
|
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
|
|
|
|
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
|
|
|
|
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
|
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
2020-05-04 11:51:17 +02:00
|
|
|
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
|
|
|
|
|
2021-05-13 16:13:28 +02:00
|
|
|
/**
|
|
|
|
* This test can be used to produce a record that can be manually fed to Solr in XML format.
|
|
|
|
*
|
|
|
|
* The input is a JoinedEntity, i.e. a json representation of an OpenAIRE entity that embeds all the linked entities.
|
|
|
|
*/
|
2020-05-04 11:51:17 +02:00
|
|
|
public class IndexRecordTransformerTest {
|
|
|
|
|
2021-05-13 16:11:22 +02:00
|
|
|
private ContextMapper contextMapper;
|
|
|
|
|
|
|
|
@BeforeEach
|
|
|
|
public void setUp() {
|
|
|
|
contextMapper = new ContextMapper();
|
|
|
|
}
|
|
|
|
|
2020-05-04 11:51:17 +02:00
|
|
|
@Test
|
2021-05-13 16:11:22 +02:00
|
|
|
public void testPreBuiltRecordTransformation() throws IOException, TransformerException {
|
2021-07-28 10:23:00 +02:00
|
|
|
final String record = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
|
2021-05-13 16:11:22 +02:00
|
|
|
|
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testPublicationRecordTransformation() throws IOException, TransformerException {
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
|
|
|
XmlConverterJob.schemaLocation);
|
2021-05-13 16:11:22 +02:00
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
final Publication p = load("publication.json", Publication.class);
|
|
|
|
final Project pj = load("project.json", Project.class);
|
|
|
|
final Relation rel = load("relToValidatedProject.json", Relation.class);
|
2021-05-13 16:11:22 +02:00
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
final JoinedEntity je = new JoinedEntity<>(p);
|
2021-05-13 16:11:22 +02:00
|
|
|
je
|
|
|
|
.setLinks(
|
|
|
|
Lists
|
|
|
|
.newArrayList(
|
|
|
|
new RelatedEntityWrapper(rel,
|
|
|
|
CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class))));
|
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
final String record = xmlRecordFactory.build(je);
|
2021-05-13 16:11:22 +02:00
|
|
|
|
|
|
|
assertNotNull(record);
|
|
|
|
|
|
|
|
testRecordTransformation(record);
|
2021-09-24 17:11:56 +02:00
|
|
|
}
|
|
|
|
|
2023-12-12 11:21:30 +01:00
|
|
|
@Test
|
|
|
|
void testPeerReviewed() throws IOException, TransformerException {
|
|
|
|
|
|
|
|
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
2023-12-20 15:57:26 +01:00
|
|
|
XmlConverterJob.schemaLocation);
|
2023-12-12 11:21:30 +01:00
|
|
|
|
|
|
|
final Publication p = load("publication.json", Publication.class);
|
|
|
|
|
|
|
|
final JoinedEntity<Publication> je = new JoinedEntity<>(p);
|
|
|
|
final String record = xmlRecordFactory.build(je);
|
|
|
|
assertNotNull(record);
|
|
|
|
SolrInputDocument solrDoc = testRecordTransformation(record);
|
|
|
|
|
|
|
|
assertEquals("true", solrDoc.get("peerreviewed").getValue());
|
|
|
|
}
|
|
|
|
|
2022-09-07 14:40:57 +02:00
|
|
|
@Test
|
|
|
|
public void testRiunet() throws IOException, TransformerException {
|
|
|
|
|
|
|
|
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
|
2022-09-09 15:16:31 +02:00
|
|
|
XmlConverterJob.schemaLocation);
|
2022-09-07 14:40:57 +02:00
|
|
|
|
|
|
|
final Publication p = load("riunet.json", Publication.class);
|
|
|
|
|
|
|
|
final JoinedEntity je = new JoinedEntity<>(p);
|
|
|
|
final String record = xmlRecordFactory.build(je);
|
|
|
|
assertNotNull(record);
|
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
|
|
|
|
2021-09-24 17:11:56 +02:00
|
|
|
@Test
|
|
|
|
public void testForEOSCFutureDataTransferPilot() throws IOException, TransformerException {
|
|
|
|
final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/data-transfer-pilot.xml"));
|
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testForEOSCFutureTraining() throws IOException, TransformerException {
|
2021-10-12 08:11:53 +02:00
|
|
|
final String record = IOUtils
|
|
|
|
.toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml"));
|
2021-09-24 17:11:56 +02:00
|
|
|
testRecordTransformation(record);
|
2021-05-13 16:11:22 +02:00
|
|
|
}
|
|
|
|
|
2021-10-07 17:30:45 +02:00
|
|
|
@Test
|
|
|
|
public void testForEOSCFutureAirQualityCopernicus() throws IOException, TransformerException {
|
2021-10-12 08:11:53 +02:00
|
|
|
final String record = IOUtils
|
|
|
|
.toString(getClass().getResourceAsStream("eosc-future/air-quality-copernicus.xml"));
|
2021-10-07 17:30:45 +02:00
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testForEOSCFutureB2SharePlotSw() throws IOException, TransformerException {
|
|
|
|
final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/b2share-plot-sw.xml"));
|
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testForEOSCFutureB2SharePlotRelatedORP() throws IOException, TransformerException {
|
2021-10-12 08:11:53 +02:00
|
|
|
final String record = IOUtils
|
|
|
|
.toString(getClass().getResourceAsStream("eosc-future/b2share-plot-related-orp.xml"));
|
2021-10-07 17:30:45 +02:00
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
|
|
|
|
2022-11-23 12:18:44 +01:00
|
|
|
@Test
|
|
|
|
public void testForEOSCFutureSoftwareNotebook() throws IOException, TransformerException {
|
|
|
|
final String record = IOUtils
|
2022-12-07 10:45:38 +01:00
|
|
|
.toString(getClass().getResourceAsStream("eosc-future/software-justthink.xml"));
|
2022-11-23 12:18:44 +01:00
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testForEOSCFutureSoftwareNotebookClaim() throws IOException, TransformerException {
|
|
|
|
final String record = IOUtils
|
2022-12-07 10:45:38 +01:00
|
|
|
.toString(getClass().getResourceAsStream("eosc-future/software-justthink-claim.xml"));
|
2022-11-23 12:18:44 +01:00
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
|
|
|
|
2022-11-30 09:57:30 +01:00
|
|
|
@Test
|
|
|
|
public void testForEOSCFutureZenodo7353841() throws IOException, TransformerException {
|
|
|
|
final String record = IOUtils
|
2022-12-07 10:45:38 +01:00
|
|
|
.toString(getClass().getResourceAsStream("eosc-future/zenodo7353841.xml"));
|
2022-11-30 09:57:30 +01:00
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testForEOSCFutureZenodo7351393() throws IOException, TransformerException {
|
|
|
|
final String record = IOUtils
|
2022-12-07 10:45:38 +01:00
|
|
|
.toString(getClass().getResourceAsStream("eosc-future/zenodo7351393.xml"));
|
2022-11-30 09:57:30 +01:00
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testForEOSCFutureZenodo7351221() throws IOException, TransformerException {
|
|
|
|
final String record = IOUtils
|
2022-12-07 10:45:38 +01:00
|
|
|
.toString(getClass().getResourceAsStream("eosc-future/zenodo7351221.xml"));
|
2022-11-30 09:57:30 +01:00
|
|
|
testRecordTransformation(record);
|
|
|
|
}
|
2022-12-07 10:45:38 +01:00
|
|
|
|
2021-12-23 12:33:53 +01:00
|
|
|
@Test
|
|
|
|
void testDoiUrlNormalization() throws MalformedURLException {
|
|
|
|
|
|
|
|
// TODO add more test examples when needed
|
|
|
|
List<String> urls = Arrays
|
|
|
|
.asList(
|
|
|
|
"https://dx.doi.org/10.1016/j.jas.2019.105013",
|
|
|
|
"http://dx.doi.org/10.13140/rg.2.2.26964.65927",
|
|
|
|
"https://dx.doi.org/10.13140/rg.2.2.26964.65927",
|
|
|
|
"http://dx.doi.org/10.1016/j.jas.2019.105013",
|
|
|
|
"http://hdl.handle.net/2072/369223",
|
|
|
|
"https://doi.org/10.1016/j.jas.2019.105013");
|
|
|
|
|
|
|
|
for (String url : urls) {
|
|
|
|
URL u = new URL(XmlRecordFactory.normalizeDoiUrl(url));
|
|
|
|
if (url.contains(XmlRecordFactory.DOI_ORG_AUTHORITY)) {
|
|
|
|
assertEquals(XmlRecordFactory.HTTPS, u.getProtocol());
|
|
|
|
assertEquals(XmlRecordFactory.DOI_ORG_AUTHORITY, u.getAuthority());
|
|
|
|
} else {
|
|
|
|
assertEquals(url, u.toString());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-12 11:21:30 +01:00
|
|
|
private SolrInputDocument testRecordTransformation(final String record) throws IOException, TransformerException {
|
2021-07-28 10:23:00 +02:00
|
|
|
final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml"));
|
|
|
|
final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"));
|
2020-05-04 11:51:17 +02:00
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt);
|
2020-05-04 11:51:17 +02:00
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
final Transformer tr = SaxonTransformerFactory.newInstance(transformer);
|
2020-05-04 11:51:17 +02:00
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
|
2021-05-13 16:11:22 +02:00
|
|
|
|
2023-02-23 11:42:42 +01:00
|
|
|
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory().parseDocument(indexRecordXML);
|
2020-05-04 11:51:17 +02:00
|
|
|
|
2021-05-13 16:11:22 +02:00
|
|
|
final String xmlDoc = ClientUtils.toXML(solrDoc);
|
|
|
|
|
|
|
|
Assertions.assertNotNull(xmlDoc);
|
|
|
|
System.out.println(xmlDoc);
|
2023-12-12 11:21:30 +01:00
|
|
|
|
|
|
|
return solrDoc;
|
2021-05-13 16:11:22 +02:00
|
|
|
}
|
2020-05-04 11:51:17 +02:00
|
|
|
|
2021-07-28 10:23:00 +02:00
|
|
|
private <T> T load(final String fileName, final Class<T> clazz) throws IOException {
|
2021-05-13 16:11:22 +02:00
|
|
|
return XmlRecordFactoryTest.OBJECT_MAPPER
|
|
|
|
.readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz);
|
2020-05-04 11:51:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|