package eu.dnetlib.ariadneplus.workflows.nodes; import java.net.URL; import java.time.Duration; import java.time.Instant; import java.time.LocalDateTime; import java.util.List; import java.util.Map; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import com.google.common.collect.Maps; import eu.dnetlib.enabling.locators.UniqueServiceLocator; import eu.dnetlib.enabling.resultset.factory.ResultSetFactory; import eu.dnetlib.miscutils.functional.xml.SaxonHelper; import eu.dnetlib.miscutils.functional.xml.XMLIndenter; import eu.dnetlib.msro.workflows.graph.Arc; import eu.dnetlib.msro.workflows.nodes.AsyncJobNode; import eu.dnetlib.msro.workflows.procs.Env; import eu.dnetlib.rmi.common.ResultSet; import eu.dnetlib.rmi.enabling.ISLookUpException; import eu.dnetlib.rmi.enabling.ISLookUpService; import eu.dnetlib.rmi.manager.MSROException; import net.sf.saxon.s9api.SaxonApiException; import net.sf.saxon.s9api.Serializer.Property; import net.sf.saxon.s9api.XPathSelector; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; public class X3MTransformAriadnePlusJobNode extends AsyncJobNode { private static final Log log = LogFactory.getLog(X3MTransformAriadnePlusJobNode.class); private static final String OAI_NAMESPACE_URI = "http://www.openarchives.org/OAI/2.0/"; private static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri"; private String inputEprParam; private String outputEprParam; private String mappingPolicyProfileId; private String mappingUrl; private boolean verboseLogging; private XPathSelector xpathSelectorMetadata; private XPathSelector xpathSelectorHeader; private XPathSelector xpathSelectorFooter; private XPathSelector xpathSelectorObjIdentifier; /** * true to pass the full record to X3m-engine. False to pass only what's in the metadata section. **/ private boolean passFullRecord; @Autowired private ResultSetFactory resultSetFactory; @Autowired private UniqueServiceLocator serviceLocator; @Autowired private SaxonHelper saxonHelper; @Override protected String execute(final Env env) throws Exception { log.info("Mapping Policy profile id read from node configuration: " + mappingPolicyProfileId); log.info("Mapping url read from node configuration: " + mappingUrl); final URL mappingURL = new URL(mappingUrl); final String policy = getProfileCode(mappingPolicyProfileId); LocalDateTime now = LocalDateTime.now(); final ResultSet rsIn = env.getAttribute(this.inputEprParam, ResultSet.class); if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); } prepareXpathSelectors(); ApplyX3MMappingFunction mappingFunction = new ApplyX3MMappingFunction(mappingURL, policy, verboseLogging); final ResultSet rsOut = this.resultSetFactory.map(rsIn, String.class, record -> { //JUST FOR DEBUGGING THE TIMEOUT OF THE MONGO CURSOR: is there a metadata record that it is really slow to transform? if(log.isDebugEnabled()) { String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier); log.debug("Transforming record objIdentifier: " + objIdentifier); } // ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappingURL, policy, verboseLogging); String toTransform = record; Instant startExtraction = Instant.now(); if(!isPassFullRecord()) { log.debug("Extracting XML from the metadata block"); toTransform = extractFromRecord(record, xpathSelectorMetadata); } String header = extractFromRecord(record, xpathSelectorHeader); String provenanceFooter = extractFromRecord(record, xpathSelectorFooter); Instant endExtraction = Instant.now(); Instant startTransform = Instant.now(); String transformed = mappingFunction.apply(toTransform); Instant endTransform = Instant.now(); if(log.isDebugEnabled()){ log.debug("Extraction took "+ Duration.between(startExtraction, endExtraction).toMillis()+" ms"); log.debug("Transformation took "+ Duration.between(startTransform, endTransform).toMillis()+" ms"); log.debug("Total mapping time: "+Duration.between(startExtraction, endTransform).toMillis()+" ms"); } String res = buildXML(header, now.toString(), transformed, provenanceFooter); if(log.isDebugEnabled()) { log.debug("SOURCE:\n"+toTransform); log.debug("TRANFORMED:\n"+res); } return res; }); env.setAttribute(this.outputEprParam, rsOut); return Arc.DEFAULT_ARC; } private String[] getMappingsCode(String[] mappingIds) throws ISLookUpException { String[] mappings = new String[mappingIds.length]; for(int i =0; i < mappingIds.length; i++){ mappings[i] = getProfileCode(mappingIds[i]); } return mappings; } protected String buildXML(final String header, final String transformationDate, final String metadata, final String provenance) { Instant start = Instant.now(); try { XMLIndenter xmlHelper = new XMLIndenter(); DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); // root elements Document doc = docBuilder.newDocument(); Element rootElement = doc.createElementNS(OAI_NAMESPACE_URI, "oai:record"); Element headerElem = docBuilder.parse(IOUtils.toInputStream(header, "UTF-8")).getDocumentElement(); Node headerNode = doc.importNode(headerElem, true); rootElement.appendChild(headerNode); Element transDate = doc.createElementNS(DRI_NAMESPACE_URI, "dri:dateOfTransformation"); transDate.setTextContent(transformationDate); headerNode.appendChild(transDate); Element metadataElement = doc.createElementNS(OAI_NAMESPACE_URI, "oai:metadata"); Element contentElem = docBuilder.parse(IOUtils.toInputStream(metadata, "UTF-8")).getDocumentElement(); Node contentNode = doc.importNode(contentElem, true); metadataElement.appendChild(contentNode); rootElement.appendChild(metadataElement); Element aboutElem = docBuilder.parse(IOUtils.toInputStream(provenance, "UTF-8")).getDocumentElement(); Node aboutNode = doc.importNode(aboutElem, true); rootElement.appendChild(aboutNode); doc.appendChild(rootElement); Instant startIndent = Instant.now(); String res = xmlHelper.indent(doc); Instant end = Instant.now(); if(log.isDebugEnabled()){ log.debug("XML built in "+ Duration.between(start, end).toMillis()+" ms"); log.debug("Serialization with indent took "+ Duration.between(startIndent, end).toMillis()+" ms"); } return res; } catch (Exception e) { throw new RuntimeException("Cannot build the transformed xml file", e); } } private void prepareXpathSelectors() throws SaxonApiException { Map namespaces = Maps.newHashMap(); namespaces.put("oai", OAI_NAMESPACE_URI); namespaces.put("dri", DRI_NAMESPACE_URI); xpathSelectorHeader = this.saxonHelper.help().prepareXPathSelector("//oai:header", namespaces); xpathSelectorMetadata = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/*", namespaces); xpathSelectorFooter = this.saxonHelper.help().prepareXPathSelector("//oai:about", namespaces); xpathSelectorObjIdentifier = this.saxonHelper.help().prepareXPathSelector("//oai:header/*[local-name()='objIdentifier']/text()", namespaces); } private String extractFromRecord(final String record, final XPathSelector xPathSelector) { try { return this.saxonHelper.help().setSerializerProperty(Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector); } catch (SaxonApiException e) { throw new RuntimeException("Cannot extract content ", e); } } private String getProfileCode(String profId) throws ISLookUpException { if (StringUtils.isBlank(profId)) return null; String xquery = "string(collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" + profId + "']//CODE)"; List res = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery); if (res.isEmpty() || StringUtils.isBlank(res.get(0))) { throw new RuntimeException("Can't find transformation rule CODE for " + profId); } return res.get(0); } public String getInputEprParam() { return this.inputEprParam; } public void setInputEprParam(final String inputEprParam) { this.inputEprParam = inputEprParam; } public String getOutputEprParam() { return this.outputEprParam; } public void setOutputEprParam(final String outputEprParam) { this.outputEprParam = outputEprParam; } public String getMappingPolicyProfileId() { return mappingPolicyProfileId; } public void setMappingPolicyProfileId(final String mappingPolicyProfileId) { this.mappingPolicyProfileId = mappingPolicyProfileId; } public boolean isVerboseLogging() { return verboseLogging; } public void setVerboseLogging(final boolean verboseLogging) { this.verboseLogging = verboseLogging; } public ResultSetFactory getResultSetFactory() { return resultSetFactory; } public void setResultSetFactory(final ResultSetFactory resultSetFactory) { this.resultSetFactory = resultSetFactory; } public UniqueServiceLocator getServiceLocator() { return serviceLocator; } public void setServiceLocator(final UniqueServiceLocator serviceLocator) { this.serviceLocator = serviceLocator; } public boolean isPassFullRecord() { return passFullRecord; } public void setPassFullRecord(final boolean passFullRecord) { this.passFullRecord = passFullRecord; } public String getMappingUrl() { return mappingUrl; } public void setMappingUrl(String mappingUrl) { this.mappingUrl = mappingUrl; } private String[] getMappingsFromUrl(String url) { String[] mappings = new String[1]; return mappings; } }