You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
271 lines
9.8 KiB
Java
271 lines
9.8 KiB
Java
package eu.dnetlib.ariadneplus.workflows.nodes;
|
|
|
|
import java.net.URL;
|
|
import java.time.Duration;
|
|
import java.time.Instant;
|
|
import java.time.LocalDateTime;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import javax.xml.parsers.DocumentBuilder;
|
|
import javax.xml.parsers.DocumentBuilderFactory;
|
|
|
|
import com.google.common.collect.Maps;
|
|
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
|
|
import eu.dnetlib.enabling.resultset.factory.ResultSetFactory;
|
|
import eu.dnetlib.miscutils.functional.xml.SaxonHelper;
|
|
import eu.dnetlib.miscutils.functional.xml.XMLIndenter;
|
|
import eu.dnetlib.msro.workflows.graph.Arc;
|
|
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
|
|
import eu.dnetlib.msro.workflows.procs.Env;
|
|
import eu.dnetlib.rmi.common.ResultSet;
|
|
import eu.dnetlib.rmi.enabling.ISLookUpException;
|
|
import eu.dnetlib.rmi.enabling.ISLookUpService;
|
|
import eu.dnetlib.rmi.manager.MSROException;
|
|
import net.sf.saxon.s9api.SaxonApiException;
|
|
import net.sf.saxon.s9api.Serializer.Property;
|
|
import net.sf.saxon.s9api.XPathSelector;
|
|
import org.apache.commons.io.IOUtils;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.w3c.dom.Document;
|
|
import org.w3c.dom.Element;
|
|
import org.w3c.dom.Node;
|
|
|
|
public class X3MTransformAriadnePlusJobNode extends AsyncJobNode {
|
|
|
|
private static final Log log = LogFactory.getLog(X3MTransformAriadnePlusJobNode.class);
|
|
private static final String OAI_NAMESPACE_URI = "http://www.openarchives.org/OAI/2.0/";
|
|
private static final String DRI_NAMESPACE_URI = "http://www.driver-repository.eu/namespace/dri";
|
|
|
|
private String inputEprParam;
|
|
private String outputEprParam;
|
|
|
|
private String mappingPolicyProfileId;
|
|
|
|
private String mappingUrl;
|
|
|
|
private boolean verboseLogging;
|
|
|
|
private XPathSelector xpathSelectorMetadata;
|
|
private XPathSelector xpathSelectorHeader;
|
|
private XPathSelector xpathSelectorFooter;
|
|
private XPathSelector xpathSelectorObjIdentifier;
|
|
|
|
/**
|
|
* true to pass the full record to X3m-engine. False to pass only what's in the metadata section.
|
|
**/
|
|
private boolean passFullRecord;
|
|
|
|
|
|
@Autowired
|
|
private ResultSetFactory resultSetFactory;
|
|
@Autowired
|
|
private UniqueServiceLocator serviceLocator;
|
|
@Autowired
|
|
private SaxonHelper saxonHelper;
|
|
|
|
@Override
|
|
protected String execute(final Env env) throws Exception {
|
|
log.info("Mapping Policy profile id read from node configuration: " + mappingPolicyProfileId);
|
|
log.info("Mapping url read from node configuration: " + mappingUrl);
|
|
final URL mappingURL = new URL(mappingUrl);
|
|
final String policy = getProfileCode(mappingPolicyProfileId);
|
|
|
|
LocalDateTime now = LocalDateTime.now();
|
|
final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class);
|
|
if ((rsIn == null)) { throw new MSROException("InputEprParam (" + this.inputEprParam + ") not found in ENV"); }
|
|
prepareXpathSelectors();
|
|
|
|
ApplyX3MMappingFunction mappingFunction = new ApplyX3MMappingFunction(mappingURL, policy, verboseLogging);
|
|
final ResultSet<String> rsOut = this.resultSetFactory.map(rsIn, String.class, record -> {
|
|
//JUST FOR DEBUGGING THE TIMEOUT OF THE MONGO CURSOR: is there a metadata record that it is really slow to transform?
|
|
if(log.isDebugEnabled()) {
|
|
String objIdentifier = extractFromRecord(record, xpathSelectorObjIdentifier);
|
|
log.debug("Transforming record objIdentifier: " + objIdentifier);
|
|
}
|
|
// ApplyX3Mapping mappingFunction = new ApplyX3Mapping(mappingURL, policy, verboseLogging);
|
|
|
|
String toTransform = record;
|
|
Instant startExtraction = Instant.now();
|
|
if(!isPassFullRecord()) {
|
|
log.debug("Extracting XML from the metadata block");
|
|
toTransform = extractFromRecord(record, xpathSelectorMetadata);
|
|
}
|
|
|
|
String header = extractFromRecord(record, xpathSelectorHeader);
|
|
String provenanceFooter = extractFromRecord(record, xpathSelectorFooter);
|
|
Instant endExtraction = Instant.now();
|
|
|
|
Instant startTransform = Instant.now();
|
|
String transformed = mappingFunction.apply(toTransform);
|
|
Instant endTransform = Instant.now();
|
|
|
|
if(log.isDebugEnabled()){
|
|
log.debug("Extraction took "+ Duration.between(startExtraction, endExtraction).toMillis()+" ms");
|
|
log.debug("Transformation took "+ Duration.between(startTransform, endTransform).toMillis()+" ms");
|
|
log.debug("Total mapping time: "+Duration.between(startExtraction, endTransform).toMillis()+" ms");
|
|
}
|
|
String res = buildXML(header, now.toString(), transformed, provenanceFooter);
|
|
if(log.isDebugEnabled()) {
|
|
log.debug("SOURCE:\n"+toTransform);
|
|
log.debug("TRANFORMED:\n"+res);
|
|
}
|
|
return res;
|
|
});
|
|
|
|
env.setAttribute(this.outputEprParam, rsOut);
|
|
|
|
return Arc.DEFAULT_ARC;
|
|
}
|
|
|
|
private String[] getMappingsCode(String[] mappingIds) throws ISLookUpException {
|
|
String[] mappings = new String[mappingIds.length];
|
|
for(int i =0; i < mappingIds.length; i++){
|
|
mappings[i] = getProfileCode(mappingIds[i]);
|
|
}
|
|
return mappings;
|
|
}
|
|
|
|
protected String buildXML(final String header, final String transformationDate, final String metadata, final String provenance) {
|
|
Instant start = Instant.now();
|
|
try {
|
|
XMLIndenter xmlHelper = new XMLIndenter();
|
|
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
|
|
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
|
|
// root elements
|
|
Document doc = docBuilder.newDocument();
|
|
Element rootElement = doc.createElementNS(OAI_NAMESPACE_URI, "oai:record");
|
|
Element headerElem = docBuilder.parse(IOUtils.toInputStream(header, "UTF-8")).getDocumentElement();
|
|
Node headerNode = doc.importNode(headerElem, true);
|
|
rootElement.appendChild(headerNode);
|
|
Element transDate = doc.createElementNS(DRI_NAMESPACE_URI, "dri:dateOfTransformation");
|
|
transDate.setTextContent(transformationDate);
|
|
headerNode.appendChild(transDate);
|
|
Element metadataElement = doc.createElementNS(OAI_NAMESPACE_URI, "oai:metadata");
|
|
Element contentElem = docBuilder.parse(IOUtils.toInputStream(metadata, "UTF-8")).getDocumentElement();
|
|
Node contentNode = doc.importNode(contentElem, true);
|
|
metadataElement.appendChild(contentNode);
|
|
rootElement.appendChild(metadataElement);
|
|
Element aboutElem = docBuilder.parse(IOUtils.toInputStream(provenance, "UTF-8")).getDocumentElement();
|
|
Node aboutNode = doc.importNode(aboutElem, true);
|
|
rootElement.appendChild(aboutNode);
|
|
|
|
doc.appendChild(rootElement);
|
|
Instant startIndent = Instant.now();
|
|
String res = xmlHelper.indent(doc);
|
|
Instant end = Instant.now();
|
|
if(log.isDebugEnabled()){
|
|
log.debug("XML built in "+ Duration.between(start, end).toMillis()+" ms");
|
|
log.debug("Serialization with indent took "+ Duration.between(startIndent, end).toMillis()+" ms");
|
|
}
|
|
return res;
|
|
} catch (Exception e) {
|
|
throw new RuntimeException("Cannot build the transformed xml file", e);
|
|
}
|
|
|
|
}
|
|
|
|
private void prepareXpathSelectors() throws SaxonApiException {
|
|
Map<String, String> namespaces = Maps.newHashMap();
|
|
namespaces.put("oai", OAI_NAMESPACE_URI);
|
|
namespaces.put("dri", DRI_NAMESPACE_URI);
|
|
xpathSelectorHeader = this.saxonHelper.help().prepareXPathSelector("//oai:header", namespaces);
|
|
xpathSelectorMetadata = this.saxonHelper.help().prepareXPathSelector("//oai:metadata/*", namespaces);
|
|
xpathSelectorFooter = this.saxonHelper.help().prepareXPathSelector("//oai:about", namespaces);
|
|
xpathSelectorObjIdentifier = this.saxonHelper.help().prepareXPathSelector("//oai:header/*[local-name()='objIdentifier']/text()", namespaces);
|
|
|
|
}
|
|
|
|
private String extractFromRecord(final String record, final XPathSelector xPathSelector) {
|
|
try {
|
|
return this.saxonHelper.help().setSerializerProperty(Property.OMIT_XML_DECLARATION, "yes").evaluateSingleAsString(record, xPathSelector);
|
|
} catch (SaxonApiException e) {
|
|
throw new RuntimeException("Cannot extract content ", e);
|
|
}
|
|
}
|
|
|
|
private String getProfileCode(String profId) throws ISLookUpException {
|
|
if (StringUtils.isBlank(profId)) return null;
|
|
String xquery = "string(collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" +
|
|
profId + "']//CODE)";
|
|
List<String> res = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xquery);
|
|
if (res.isEmpty() || StringUtils.isBlank(res.get(0))) {
|
|
throw new RuntimeException("Can't find transformation rule CODE for " + profId);
|
|
}
|
|
return res.get(0);
|
|
}
|
|
|
|
public String getInputEprParam() {
|
|
return this.inputEprParam;
|
|
}
|
|
|
|
public void setInputEprParam(final String inputEprParam) {
|
|
this.inputEprParam = inputEprParam;
|
|
}
|
|
|
|
public String getOutputEprParam() {
|
|
return this.outputEprParam;
|
|
}
|
|
|
|
public void setOutputEprParam(final String outputEprParam) {
|
|
this.outputEprParam = outputEprParam;
|
|
}
|
|
|
|
public String getMappingPolicyProfileId() {
|
|
return mappingPolicyProfileId;
|
|
}
|
|
|
|
public void setMappingPolicyProfileId(final String mappingPolicyProfileId) {
|
|
this.mappingPolicyProfileId = mappingPolicyProfileId;
|
|
}
|
|
|
|
public boolean isVerboseLogging() {
|
|
return verboseLogging;
|
|
}
|
|
|
|
public void setVerboseLogging(final boolean verboseLogging) {
|
|
this.verboseLogging = verboseLogging;
|
|
}
|
|
|
|
public ResultSetFactory getResultSetFactory() {
|
|
return resultSetFactory;
|
|
}
|
|
|
|
public void setResultSetFactory(final ResultSetFactory resultSetFactory) {
|
|
this.resultSetFactory = resultSetFactory;
|
|
}
|
|
|
|
public UniqueServiceLocator getServiceLocator() {
|
|
return serviceLocator;
|
|
}
|
|
|
|
public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
|
|
this.serviceLocator = serviceLocator;
|
|
}
|
|
|
|
public boolean isPassFullRecord() {
|
|
return passFullRecord;
|
|
}
|
|
|
|
public void setPassFullRecord(final boolean passFullRecord) {
|
|
this.passFullRecord = passFullRecord;
|
|
}
|
|
|
|
public String getMappingUrl() {
|
|
return mappingUrl;
|
|
}
|
|
|
|
public void setMappingUrl(String mappingUrl) {
|
|
this.mappingUrl = mappingUrl;
|
|
}
|
|
|
|
private String[] getMappingsFromUrl(String url) {
|
|
String[] mappings = new String[1];
|
|
|
|
return mappings;
|
|
}
|
|
|
|
}
|