mapping loaded only once now
This commit is contained in:
parent
a1843bdc7c
commit
5a68ab07b9
|
@ -0,0 +1,87 @@
|
||||||
|
package eu.dnetlib.ariadneplus.workflows.nodes;
|
||||||
|
|
||||||
|
import gr.forth.ics.isl.x3ml.X3MLEngineFactory;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
public class ApplyX3MMappingFunction implements Function<String, String> {
|
||||||
|
|
||||||
|
private static final Log log = LogFactory.getLog(ApplyX3MMappingFunction.class);
|
||||||
|
private String generatorPolicy;
|
||||||
|
private boolean verboseLogging;
|
||||||
|
private URL mappingUrl;
|
||||||
|
private byte[] mappingBytes;
|
||||||
|
|
||||||
|
public ApplyX3MMappingFunction(final URL mappingUrl, final String generatorPolicy, final boolean verboseLogging) {
|
||||||
|
this.mappingUrl = mappingUrl;
|
||||||
|
this.generatorPolicy = generatorPolicy;
|
||||||
|
this.verboseLogging = verboseLogging;
|
||||||
|
|
||||||
|
try {
|
||||||
|
this.mappingBytes = IOUtils.toByteArray(mappingUrl.openStream());
|
||||||
|
log.info("Loaded mapping file from: "+mappingUrl.toString());
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error("Saving mappings in bytes array: "+e.getMessage()+"\n");
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String apply(final String metadata) {
|
||||||
|
try (
|
||||||
|
InputStream policyStream = getStream(generatorPolicy);
|
||||||
|
InputStream metadataStream = getStream(metadata);
|
||||||
|
final ByteArrayOutputStream os = new ByteArrayOutputStream()){
|
||||||
|
|
||||||
|
X3MLEngineFactory x3mEngineFactory = getConfiguredX3MEngineFactory(/*mappingUrl,*/ policyStream);
|
||||||
|
|
||||||
|
x3mEngineFactory.withInput(metadataStream).withOutput(os, X3MLEngineFactory.OutputFormat.RDF_XML_PLAIN);
|
||||||
|
x3mEngineFactory.execute();
|
||||||
|
return new String(os.toByteArray());
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error transforming record: "+e.getMessage()+"\n"+metadata);
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private InputStream getStream(String s) throws IOException {
|
||||||
|
if (StringUtils.isNotBlank(s)) {
|
||||||
|
return IOUtils.toInputStream(s, "UTF-8");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private X3MLEngineFactory getConfiguredX3MEngineFactory(/*URL mappingUrl,*/ InputStream policy) {
|
||||||
|
log.debug("Loading mapping from url: " + mappingUrl);
|
||||||
|
|
||||||
|
// X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create().withMappings(mappingUrl);
|
||||||
|
X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create().withMappings(new ByteArrayInputStream(this.mappingBytes));
|
||||||
|
if (policy != null) {
|
||||||
|
x3mEngineFactory.withGeneratorPolicy(policy);
|
||||||
|
}
|
||||||
|
if (verboseLogging)
|
||||||
|
x3mEngineFactory.withVerboseLogging();
|
||||||
|
//to enable real UUID
|
||||||
|
x3mEngineFactory.withUuidSize(0);
|
||||||
|
return x3mEngineFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public URL getMappingUrl() {
|
||||||
|
return mappingUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMappingUrl(URL mappingUrl) {
|
||||||
|
this.mappingUrl = mappingUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -16,7 +16,6 @@ import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
|
||||||
import eu.dnetlib.miscutils.functional.xml.XMLIndenter;
|
import eu.dnetlib.miscutils.functional.xml.XMLIndenter;
|
||||||
import eu.dnetlib.msro.workflows.graph.Arc;
|
import eu.dnetlib.msro.workflows.graph.Arc;
|
||||||
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
|
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
|
||||||
import eu.dnetlib.msro.workflows.nodes.transform.ApplyX3Mapping;
|
|
||||||
import eu.dnetlib.msro.workflows.procs.Env;
|
import eu.dnetlib.msro.workflows.procs.Env;
|
||||||
import eu.dnetlib.rmi.common.ResultSet;
|
import eu.dnetlib.rmi.common.ResultSet;
|
||||||
import eu.dnetlib.rmi.enabling.ISLookUpException;
|
import eu.dnetlib.rmi.enabling.ISLookUpException;
|
||||||
|
@ -79,14 +78,14 @@ public class X3MTransformAriadnePlusJobNode extends AsyncJobNode {
|
||||||
if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); }
|
if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); }
|
||||||
prepareXpathSelectors();
|
prepareXpathSelectors();
|
||||||
|
|
||||||
|
ApplyX3MMappingFunction mappingFunction = new ApplyX3MMappingFunction(mappingURL, policy, verboseLogging);
|
||||||
final ResultSet<String> rsOut = this.resultSetFactory.map(rsIn, String.class, record -> {
|
final ResultSet<String> rsOut = this.resultSetFactory.map(rsIn, String.class, record -> {
|
||||||
//JUST FOR DEBUGGING THE TIMEOUT OF THE MONGO CURSOR: is there a metadata record that it is really slow to transform?
|
//JUST FOR DEBUGGING THE TIMEOUT OF THE MONGO CURSOR: is there a metadata record that it is really slow to transform?
|
||||||
if(log.isDebugEnabled()) {
|
if(log.isDebugEnabled()) {
|
||||||
String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier);
|
String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier);
|
||||||
log.debug("Transforming record objIdentifier: " + objIdentifier);
|
log.debug("Transforming record objIdentifier: " + objIdentifier);
|
||||||
}
|
}
|
||||||
ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappingURL, policy, verboseLogging);
|
// ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappingURL, policy, verboseLogging);
|
||||||
|
|
||||||
String toTransform = record;
|
String toTransform = record;
|
||||||
Instant startExtraction = Instant.now();
|
Instant startExtraction = Instant.now();
|
||||||
|
|
Loading…
Reference in New Issue