Browse Source

mapping loaded only once now

new_es_mapping
Enrico Ottonello 10 months ago
parent
commit
5a68ab07b9
  1. 87
      dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/ApplyX3MMappingFunction.java
  2. 5
      dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/X3MTransformAriadnePlusJobNode.java

87
dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/ApplyX3MMappingFunction.java

@ -0,0 +1,87 @@
package eu.dnetlib.ariadneplus.workflows.nodes;
import gr.forth.ics.isl.x3ml.X3MLEngineFactory;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.function.Function;
public class ApplyX3MMappingFunction implements Function<String, String> {
private static final Log log = LogFactory.getLog(ApplyX3MMappingFunction.class);
private String generatorPolicy;
private boolean verboseLogging;
private URL mappingUrl;
private byte[] mappingBytes;
public ApplyX3MMappingFunction(final URL mappingUrl, final String generatorPolicy, final boolean verboseLogging) {
this.mappingUrl = mappingUrl;
this.generatorPolicy = generatorPolicy;
this.verboseLogging = verboseLogging;
try {
this.mappingBytes = IOUtils.toByteArray(mappingUrl.openStream());
log.info("Loaded mapping file from: "+mappingUrl.toString());
} catch (IOException e) {
log.error("Saving mappings in bytes array: "+e.getMessage()+"\n");
throw new RuntimeException(e);
}
}
@Override
public String apply(final String metadata) {
try (
InputStream policyStream = getStream(generatorPolicy);
InputStream metadataStream = getStream(metadata);
final ByteArrayOutputStream os = new ByteArrayOutputStream()){
X3MLEngineFactory x3mEngineFactory = getConfiguredX3MEngineFactory(/*mappingUrl,*/ policyStream);
x3mEngineFactory.withInput(metadataStream).withOutput(os, X3MLEngineFactory.OutputFormat.RDF_XML_PLAIN);
x3mEngineFactory.execute();
return new String(os.toByteArray());
} catch (Exception e) {
log.error("Error transforming record: "+e.getMessage()+"\n"+metadata);
throw new RuntimeException(e);
}
}
private InputStream getStream(String s) throws IOException {
if (StringUtils.isNotBlank(s)) {
return IOUtils.toInputStream(s, "UTF-8");
}
return null;
}
private X3MLEngineFactory getConfiguredX3MEngineFactory(/*URL mappingUrl,*/ InputStream policy) {
log.debug("Loading mapping from url: " + mappingUrl);
// X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create().withMappings(mappingUrl);
X3MLEngineFactory x3mEngineFactory = X3MLEngineFactory.create().withMappings(new ByteArrayInputStream(this.mappingBytes));
if (policy != null) {
x3mEngineFactory.withGeneratorPolicy(policy);
}
if (verboseLogging)
x3mEngineFactory.withVerboseLogging();
//to enable real UUID
x3mEngineFactory.withUuidSize(0);
return x3mEngineFactory;
}
public URL getMappingUrl() {
return mappingUrl;
}
public void setMappingUrl(URL mappingUrl) {
this.mappingUrl = mappingUrl;
}
}

5
dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/X3MTransformAriadnePlusJobNode.java

@ -16,7 +16,6 @@ import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
import eu.dnetlib.miscutils.functional.xml.XMLIndenter;
import eu.dnetlib.msro.workflows.graph.Arc;
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
import eu.dnetlib.msro.workflows.nodes.transform.ApplyX3Mapping;
import eu.dnetlib.msro.workflows.procs.Env;
import eu.dnetlib.rmi.common.ResultSet;
import eu.dnetlib.rmi.enabling.ISLookUpException;
@ -79,14 +78,14 @@ public class X3MTransformAriadnePlusJobNode extends AsyncJobNode {
if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); }
prepareXpathSelectors();
ApplyX3MMappingFunction mappingFunction = new ApplyX3MMappingFunction(mappingURL, policy, verboseLogging);
final ResultSet<String> rsOut = this.resultSetFactory.map(rsIn, String.class, record -> {
//JUST FOR DEBUGGING THE TIMEOUT OF THE MONGO CURSOR: is there a metadata record that it is really slow to transform?
if(log.isDebugEnabled()) {
String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier);
log.debug("Transforming record objIdentifier: " + objIdentifier);
}
ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappingURL, policy, verboseLogging);
// ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappingURL, policy, verboseLogging);
String toTransform = record;
Instant startExtraction = Instant.now();

Loading…
Cancel
Save