You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
AriadnePlus/dnet-ariadneplus/src/main/java/eu/dnetlib/ariadneplus/workflows/nodes/X3MTransformAriadnePlusJobN...

271 lines
9.8 KiB
Java

package eu.dnetlib.ariadneplus.workflows.nodes;
import java.net.URL;
import java.time.Duration;
import java.time.Instant;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import com.google.common.collect.Maps;
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
import eu.dnetlib.enabling.resultset.factory.ResultSetFactory;
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
import eu.dnetlib.miscutils.functional.xml.XMLIndenter;
import eu.dnetlib.msro.workflows.graph.Arc;
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
import eu.dnetlib.msro.workflows.procs.Env;
import eu.dnetlib.rmi.common.ResultSet;
import eu.dnetlib.rmi.enabling.ISLookUpException;
import eu.dnetlib.rmi.enabling.ISLookUpService;
import eu.dnetlib.rmi.manager.MSROException;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer.Property;
import net.sf.saxon.s9api.XPathSelector;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
public class X3MTransformAriadnePlusJobNode extends AsyncJobNode {
private static final Log log = LogFactory.getLog(X3MTransformAriadnePlusJobNode.class);
private static final String OAI_NAMESPACE_URI = "http://www.openarchives.org/OAI/2.0/";
private static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri";
private String inputEprParam;
private String outputEprParam;
private String mappingPolicyProfileId;
private String mappingUrl;
private boolean verboseLogging;
private XPathSelector xpathSelectorMetadata;
private XPathSelector xpathSelectorHeader;
private XPathSelector xpathSelectorFooter;
private XPathSelector xpathSelectorObjIdentifier;
/**
* true to pass the full record to X3m-engine. False to pass only what's in the metadata section.
**/
private boolean passFullRecord;
@Autowired
private ResultSetFactory resultSetFactory;
@Autowired
private UniqueServiceLocator serviceLocator;
@Autowired
private SaxonHelper saxonHelper;
@Override
protected String execute(final Env env) throws Exception {
log.info("Mapping Policy profile id read from node configuration: " + mappingPolicyProfileId);
log.info("Mapping url read from node configuration: " + mappingUrl);
final URL mappingURL = new URL(mappingUrl);
final String policy = getProfileCode(mappingPolicyProfileId);
LocalDateTime now = LocalDateTime.now();
final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class);
if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); }
prepareXpathSelectors();
ApplyX3MMappingFunction mappingFunction = new ApplyX3MMappingFunction(mappingURL, policy, verboseLogging);
final ResultSet<String> rsOut = this.resultSetFactory.map(rsIn, String.class, record -> {
//JUST FOR DEBUGGING THE TIMEOUT OF THE MONGO CURSOR: is there a metadata record that it is really slow to transform?
if(log.isDebugEnabled()) {
String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier);
log.debug("Transforming record objIdentifier: " + objIdentifier);
}
// ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappingURL, policy, verboseLogging);
String toTransform = record;
Instant startExtraction = Instant.now();
if(!isPassFullRecord()) {
log.debug("Extracting XML from the metadata block");
toTransform = extractFromRecord(record, xpathSelectorMetadata);
}
String header = extractFromRecord(record, xpathSelectorHeader);
String provenanceFooter = extractFromRecord(record, xpathSelectorFooter);
Instant endExtraction = Instant.now();
Instant startTransform = Instant.now();
String transformed = mappingFunction.apply(toTransform);
Instant endTransform = Instant.now();
if(log.isDebugEnabled()){
log.debug("Extraction took "+ Duration.between(startExtraction, endExtraction).toMillis()+" ms");
log.debug("Transformation took "+ Duration.between(startTransform, endTransform).toMillis()+" ms");
log.debug("Total mapping time: "+Duration.between(startExtraction, endTransform).toMillis()+" ms");
}
String res = buildXML(header, now.toString(), transformed, provenanceFooter);
if(log.isDebugEnabled()) {
log.debug("SOURCE:\n"+toTransform);
log.debug("TRANFORMED:\n"+res);
}
return res;
});
env.setAttribute(this.outputEprParam, rsOut);
return Arc.DEFAULT_ARC;
}
private String[] getMappingsCode(String[] mappingIds) throws ISLookUpException {
String[] mappings = new String[mappingIds.length];
for(int i =0; i < mappingIds.length; i++){
mappings[i] = getProfileCode(mappingIds[i]);
}
return mappings;
}
protected String buildXML(final String header, final String transformationDate, final String metadata, final String provenance) {
Instant start = Instant.now();
try {
XMLIndenter xmlHelper = new XMLIndenter();
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
// root elements
Document doc = docBuilder.newDocument();
Element rootElement = doc.createElementNS(OAI_NAMESPACE_URI, "oai:record");
Element headerElem = docBuilder.parse(IOUtils.toInputStream(header, "UTF-8")).getDocumentElement();
Node headerNode = doc.importNode(headerElem, true);
rootElement.appendChild(headerNode);
Element transDate = doc.createElementNS(DRI_NAMESPACE_URI, "dri:dateOfTransformation");
transDate.setTextContent(transformationDate);
headerNode.appendChild(transDate);
Element metadataElement = doc.createElementNS(OAI_NAMESPACE_URI, "oai:metadata");
Element contentElem = docBuilder.parse(IOUtils.toInputStream(metadata, "UTF-8")).getDocumentElement();
Node contentNode = doc.importNode(contentElem, true);
metadataElement.appendChild(contentNode);
rootElement.appendChild(metadataElement);
Element aboutElem = docBuilder.parse(IOUtils.toInputStream(provenance, "UTF-8")).getDocumentElement();
Node aboutNode = doc.importNode(aboutElem, true);
rootElement.appendChild(aboutNode);
doc.appendChild(rootElement);
Instant startIndent = Instant.now();
String res = xmlHelper.indent(doc);
Instant end = Instant.now();
if(log.isDebugEnabled()){
log.debug("XML built in "+ Duration.between(start, end).toMillis()+" ms");
log.debug("Serialization with indent took "+ Duration.between(startIndent, end).toMillis()+" ms");
}
return res;
} catch (Exception e) {
throw new RuntimeException("Cannot build the transformed xml file", e);
}
}
private void prepareXpathSelectors() throws SaxonApiException {
Map<String, String> namespaces = Maps.newHashMap();
namespaces.put("oai", OAI_NAMESPACE_URI);
namespaces.put("dri", DRI_NAMESPACE_URI);
xpathSelectorHeader = this.saxonHelper.help().prepareXPathSelector("//oai:header", namespaces);
xpathSelectorMetadata = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/*", namespaces);
xpathSelectorFooter = this.saxonHelper.help().prepareXPathSelector("//oai:about", namespaces);
xpathSelectorObjIdentifier = this.saxonHelper.help().prepareXPathSelector("//oai:header/*[local-name()='objIdentifier']/text()", namespaces);
}
private String extractFromRecord(final String record, final XPathSelector xPathSelector) {
try {
return this.saxonHelper.help().setSerializerProperty(Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector);
} catch (SaxonApiException e) {
throw new RuntimeException("Cannot extract content ", e);
}
}
private String getProfileCode(String profId) throws ISLookUpException {
if (StringUtils.isBlank(profId)) return null;
String xquery = "string(collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" +
profId + "']//CODE)";
List<String> res = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery);
if (res.isEmpty() || StringUtils.isBlank(res.get(0))) {
throw new RuntimeException("Can't find transformation rule CODE for " + profId);
}
return res.get(0);
}
public String getInputEprParam() {
return this.inputEprParam;
}
public void setInputEprParam(final String inputEprParam) {
this.inputEprParam = inputEprParam;
}
public String getOutputEprParam() {
return this.outputEprParam;
}
public void setOutputEprParam(final String outputEprParam) {
this.outputEprParam = outputEprParam;
}
public String getMappingPolicyProfileId() {
return mappingPolicyProfileId;
}
public void setMappingPolicyProfileId(final String mappingPolicyProfileId) {
this.mappingPolicyProfileId = mappingPolicyProfileId;
}
public boolean isVerboseLogging() {
return verboseLogging;
}
public void setVerboseLogging(final boolean verboseLogging) {
this.verboseLogging = verboseLogging;
}
public ResultSetFactory getResultSetFactory() {
return resultSetFactory;
}
public void setResultSetFactory(final ResultSetFactory resultSetFactory) {
this.resultSetFactory = resultSetFactory;
}
public UniqueServiceLocator getServiceLocator() {
return serviceLocator;
}
public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
this.serviceLocator = serviceLocator;
}
public boolean isPassFullRecord() {
return passFullRecord;
}
public void setPassFullRecord(final boolean passFullRecord) {
this.passFullRecord = passFullRecord;
}
public String getMappingUrl() {
return mappingUrl;
}
public void setMappingUrl(String mappingUrl) {
this.mappingUrl = mappingUrl;
}
private String[] getMappingsFromUrl(String url) {
String[] mappings = new String[1];
return mappings;
}
}