diff --git a/dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/ApplyX3MMappingFunction.java b/dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/ApplyX3MMappingFunction.java new file mode 100644 index 0000000..adb9f5e --- /dev/null +++ b/dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/ApplyX3MMappingFunction.java @@ -0,0 +1,87 @@ +package eu.dnetlib.ariadneplus.workflows.nodes; + +import gr.forth.ics.isl.x3ml.X3MLEngineFactory; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.function.Function; + +public class ApplyX3MMappingFunction implements Function { + + private static final Log log = LogFactory.getLog(ApplyX3MMappingFunction.class); + private String generatorPolicy; + private boolean verboseLogging; + private URL mappingUrl; + private byte[] mappingBytes; + + public ApplyX3MMappingFunction(final URL mappingUrl, final String generatorPolicy, final boolean verboseLogging) { + this.mappingUrl = mappingUrl; + this.generatorPolicy = generatorPolicy; + this.verboseLogging = verboseLogging; + + try { + this.mappingBytes = IOUtils.toByteArray(mappingUrl.openStream()); + log.info("Loaded mapping file from: "+mappingUrl.toString()); + } catch (IOException e) { + log.error("Saving mappings in bytes array: "+e.getMessage()+"\n"); + throw new RuntimeException(e); + } + } + + @Override + public String apply(final String metadata) { + try ( + InputStream policyStream = getStream(generatorPolicy); + InputStream metadataStream = getStream(metadata); + final ByteArrayOutputStream os = new ByteArrayOutputStream()){ + + X3MLEngineFactory x3mEngineFactory = getConfiguredX3MEngineFactory(/*mappingUrl,*/ policyStream); + + x3mEngineFactory.withInput(metadataStream).withOutput(os, X3MLEngineFactory.OutputFormat.RDF_XML_PLAIN); + x3mEngineFactory.execute(); + return new String(os.toByteArray()); + } catch (Exception e) { + log.error("Error transforming record: "+e.getMessage()+"\n"+metadata); + throw new RuntimeException(e); + } + } + + private InputStream getStream(String s) throws IOException { + if (StringUtils.isNotBlank(s)) { + return IOUtils.toInputStream(s, "UTF-8"); + } + return null; + } + + private X3MLEngineFactory getConfiguredX3MEngineFactory(/*URL mappingUrl,*/ InputStream policy) { + log.debug("Loading mapping from url: " + mappingUrl); + +// X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create().withMappings(mappingUrl); + X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create().withMappings(new ByteArrayInputStream(this.mappingBytes)); + if (policy != null) { + x3mEngineFactory.withGeneratorPolicy(policy); + } + if (verboseLogging) + x3mEngineFactory.withVerboseLogging(); + //to enable real UUID + x3mEngineFactory.withUuidSize(0); + return x3mEngineFactory; + } + + public URL getMappingUrl() { + return mappingUrl; + } + + public void setMappingUrl(URL mappingUrl) { + this.mappingUrl = mappingUrl; + } + + +} diff --git a/dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/X3MTransformAriadnePlusJobNode.java b/dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/X3MTransformAriadnePlusJobNode.java index 3143c5a..c6bd89c 100644 --- a/dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/X3MTransformAriadnePlusJobNode.java +++ b/dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/X3MTransformAriadnePlusJobNode.java @@ -16,7 +16,6 @@ import eu.dnetlib.miscutils.functional.xml.SaxonHelper; import eu.dnetlib.miscutils.functional.xml.XMLIndenter; import eu.dnetlib.msro.workflows.graph.Arc; import eu.dnetlib.msro.workflows.nodes.AsyncJobNode; -import eu.dnetlib.msro.workflows.nodes.transform.ApplyX3Mapping; import eu.dnetlib.msro.workflows.procs.Env; import eu.dnetlib.rmi.common.ResultSet; import eu.dnetlib.rmi.enabling.ISLookUpException; @@ -79,14 +78,14 @@ public class X3MTransformAriadnePlusJobNode extends AsyncJobNode { if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); } prepareXpathSelectors(); - + ApplyX3MMappingFunction mappingFunction = new ApplyX3MMappingFunction(mappingURL, policy, verboseLogging); final ResultSet rsOut = this.resultSetFactory.map(rsIn, String.class, record -> { //JUST FOR DEBUGGING THE TIMEOUT OF THE MONGO CURSOR: is there a metadata record that it is really slow to transform? if(log.isDebugEnabled()) { String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier); log.debug("Transforming record objIdentifier: " + objIdentifier); } - ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappingURL, policy, verboseLogging); +// ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappingURL, policy, verboseLogging); String toTransform = record; Instant startExtraction = Instant.now();