AriadnePlus/dnet-ariadneplus/src/test/java/eu/dnetlib/ariadneplus/workflows/nodes/X3MTransformAriadnePlusJobN...

161 lines
10 KiB
Java

package eu.dnetlib.ariadneplus.workflows.nodes;
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
import net.sf.saxon.s9api.SaxonApiException;
import org.apache.commons.io.IOUtils;
import org.junit.Before;
import org.junit.Test;
import org.springframework.core.io.ClassPathResource;
import java.io.IOException;
import java.time.Duration;
import java.time.LocalDateTime;
/**
* Created by Alessia Bardi on 13/04/2017.
*
* @author Alessia Bardi
*/
public class X3MTransformAriadnePlusJobNodeTest {
private X3MTransformAriadnePlusJobNode transformJob;
private String header = "<oai:header xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">\n"
+ " <dri:objIdentifier>ariadne_mock::0000023f507999464aa2b78875b7e5d6</dri:objIdentifier>\n"
+ " <dri:recordIdentifier>2420500</dri:recordIdentifier>\n"
+ " <dri:dateOfCollection>2017-04-10T18:44:46.85+02:00</dri:dateOfCollection>\n"
+ " <dri:datasourceprefix>ariadne_mock</dri:datasourceprefix>\n"
+ " <dri:datasourcename>Ariadne Mock</dri:datasourcename>\n"
+ " <dri:dateOfTransformation>2017-04-12T16:31:45.766</dri:dateOfTransformation>\n"
+ " <dri:invalid value=\"true\">\n"
+ " <dri:error vocabularies=\"dnet:languages\" xpath=\"//*[local-name()='P72_has_language']\"\n"
+ " term=\"en\"/>\n"
+ " </dri:invalid>\n"
+ " </oai:header>";
private String footer = "<oai:about xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">\n"
+ " <provenance xmlns=\"http://www.openarchives.org/OAI/2.0/provenance\"\n"
+ " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+ " xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd\">\n"
+ " <originDescription xmlns=\"\" altered=\"true\" harvestDate=\"2017-04-10T18:44:46.85+02:00\">\n"
+ " <baseURL>sftp%3A%2F%2Fariadne2.isti.cnr.it%2F..%2F..%2Fdata%2Ftransform%2Facdm_correct</baseURL>\n"
+ " <identifier/>\n"
+ " <datestamp/>\n"
+ " <metadataNamespace/>\n"
+ " </originDescription>\n"
+ " </provenance>\n"
+ " </oai:about>";
private String metadata=" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n"
+ " xmlns:dbpedia-owl=\"http://dbpedia.org/ontology/\"\n"
+ " xmlns:acdm=\"http://registry.ariadne-infrastructure.eu/\"\n"
+ " xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\"\n"
+ " xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\"\n"
+ " xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\"\n"
+ " xmlns:frbr=\"http://www.cidoc-crm.org/frbroo/\" xmlns:dcterms=\"http://purl.org/dc/terms/\"\n"
+ " xmlns:dcat=\"http://www.w3.org/ns/dcat#\" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\"\n"
+ " xmlns:crm=\"http://www.cidoc-crm.org/cidoc-crm/\"\n"
+ " xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n"
+ " <frbr:F30_Publication_Event rdf:about=\"uuid:AAAA\"/>\n"
+ " <crm:E65_Creation rdf:about=\"uuid:AAAB\"/>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/dataset/2420500\">\n"
+ " <crm:P129_is_about>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/CAIRN\">\n"
+ " <crm:P129_is_about>CAIRN</crm:P129_is_about>\n"
+ " </crm:E73_Information_Object>\n"
+ " </crm:P129_is_about>\n"
+ " <crm:P129_is_about>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/HUT%20CIRCLE\">\n"
+ " <crm:P129_is_about>HUT CIRCLE</crm:P129_is_about>\n"
+ " </crm:E73_Information_Object>\n"
+ " </crm:P129_is_about>\n"
+ " <crm:P104_is_subject_to>ADS Terms and Conditions</crm:P104_is_subject_to>\n"
+ " <crm:P129_is_about>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/SHEEPFOLD\">\n"
+ " <crm:P129_is_about>SHEEPFOLD</crm:P129_is_about>\n"
+ " </crm:E73_Information_Object>\n"
+ " </crm:P129_is_about>\n"
+ " <crm:P129_is_about>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/FIELD%20SYSTEM\">\n"
+ " <crm:P129_is_about>FIELD SYSTEM</crm:P129_is_about>\n"
+ " </crm:E73_Information_Object>\n"
+ " </crm:P129_is_about>\n"
+ " <crm:P102_has_title>MID GLEN CROE</crm:P102_has_title>\n"
+ " <crm:P165_incorporates>\n"
+ " <crm:E33_Linguistic_Object rdf:about=\"uuid:AAAG\">\n"
+ " <crm:P72_has_language>en</crm:P72_has_language>\n"
+ " </crm:E33_Linguistic_Object>\n"
+ " </crm:P165_incorporates>\n"
+ " <crm:P67_refers_to>\n"
+ " <crm:E1_CRM_Entity rdf:about=\"uuid:AAAH\">\n"
+ " <crm:P2_has_type>Sites and monuments databases or\n"
+ " inventories</crm:P2_has_type>\n"
+ " </crm:E1_CRM_Entity>\n"
+ " </crm:P67_refers_to>\n"
+ " <crm:P93i_was_taken_out_of_existence_by>\n"
+ " <crm:E6_Destruction rdf:about=\"uuid:AAAE\">\n"
+ " <crm:P4_has_time-span>\n"
+ " <crm:E52_Time-Span rdf:about=\"uuid:AAAF\">\n"
+ " <crm:P81_ongoing_throughout>2013-12-09\n"
+ " 00:00:00.0</crm:P81_ongoing_throughout>\n"
+ " </crm:E52_Time-Span>\n"
+ " </crm:P4_has_time-span>\n"
+ " </crm:E6_Destruction>\n"
+ " </crm:P93i_was_taken_out_of_existence_by>\n"
+ " <crm:P94i_was_created_by>\n"
+ " <frbr:F30_Publication_Event rdf:about=\"uuid:AAAC\">\n"
+ " <crm:P4_has_time-span>\n"
+ " <crm:E52_Time-Span rdf:about=\"uuid:AAAD\">\n"
+ " <crm:P81_ongoing_throughout>2013-12-09\n"
+ " 00:00:00.0</crm:P81_ongoing_throughout>\n"
+ " </crm:E52_Time-Span>\n"
+ " </crm:P4_has_time-span>\n"
+ " </frbr:F30_Publication_Event>\n"
+ " </crm:P94i_was_created_by>\n"
+ " <crm:P129_is_about>\n"
+ " <crm:E73_Information_Object\n"
+ " rdf:about=\"http://registry.ariadne-infrastructure.eu/subject/BUILDING\">\n"
+ " <crm:P129_is_about>BUILDING</crm:P129_is_about>\n"
+ " </crm:E73_Information_Object>\n"
+ " </crm:P129_is_about>\n"
+ " <crm:P1_is_identified_by>2420500</crm:P1_is_identified_by>\n"
+ " <crm:P106i_forms_part_of>http://registry.ariadne-infrastructure.eu/collection/22721290</crm:P106i_forms_part_of>\n"
+ " <crm:P3_has_note>Multiple instances of: SHEEPFOLD&lt;br /&gt;&lt;br /&gt;Multiple\n"
+ " instances of: BUILDING&lt;br /&gt;Multiple instances of: FIELD SYSTEM&lt;br\n"
+ " /&gt;Possible instance of: CAIRN&lt;br /&gt;Multiple instances of: HUT\n"
+ " CIRCLE&lt;br /&gt;Possible instance of: HUT CIRCLE</crm:P3_has_note>\n"
+ " </crm:E73_Information_Object>\n"
+ " </rdf:RDF>";
@Before
public void setup() throws SaxonApiException {
transformJob = new X3MTransformAriadnePlusJobNode();
SaxonHelper saxonHelper = new SaxonHelper();
transformJob.setSaxonHelper(saxonHelper);
transformJob.prepareXpathSelectors();
}
@Test
public void testBuildXML(){
LocalDateTime now = LocalDateTime.now();
String res = transformJob.buildXML(header, now.toString(), metadata, footer);
LocalDateTime end = LocalDateTime.now();
System.out.println("Building XML took:"+ Duration.between(now, end).toMillis());
System.out.println(res);
}
@Test
public void testGetMetadataBlock() throws IOException, SaxonApiException {
String record = getString("/eu/dnetlib/x3m/dime-full.xml");
String md = transformJob.extractFromRecord(record, transformJob.getXpathSelectorMetadata());
System.out.println(md);
}
private String getString(final String classpath) throws IOException {
final ClassPathResource resource = new ClassPathResource(classpath);
return IOUtils.toString(resource.getInputStream(), "UTF-8");
}
}