dnet-core/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/TransformationImpl.java

354 lines
12 KiB
Java

package eu.dnetlib.data.collective.transformation.engine.core;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.FeatureKeys;
import net.sf.saxon.instruct.TerminationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.springframework.core.io.Resource;
import eu.dnetlib.data.collective.transformation.TransformationException;
import eu.dnetlib.data.collective.transformation.core.schema.SchemaInspector;
import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
import eu.dnetlib.data.collective.transformation.utils.NamespaceContextImpl;
/**
* @author jochen
*
*/
public class TransformationImpl implements
ITransformation {
private static final String rootElement = "record";
private final Log log = LogFactory.getLog(TransformationImpl.class);
private Document xslDoc;
private SAXReader reader = new SAXReader();
private Transformer transformer;
private Transformer transformerFailed;
protected RuleLanguageParser ruleLanguageParser;
private StylesheetBuilder stylesheetBuilder;
// cache static transformation results, valid for one transformation job
private Map<String, String> staticResults = new LinkedHashMap<String, String>();
private Map<String, String> jobConstantMap = new HashMap<String, String>();
@javax.annotation.Resource(name="template")
private Resource template;
private Resource schema;
private Source xsltSyntaxcheckFailed;
/**
* initializes the transformation with the underlying XSL-template
*/
public void init(){
try {
xslDoc = reader.read(template.getInputStream());
Resource xslResource = template.createRelative(XSLSyntaxcheckfailed);
String systemId = xslResource.getURL().toExternalForm();
xsltSyntaxcheckFailed = new StreamSource(xslResource.getInputStream(), systemId);
} catch (Throwable e) {
log.error("cannot initialize this transformation.", e);
throw new IllegalStateException(e);
}
}
public void addJobConstant(String aKey, String aValue){
this.jobConstantMap.put(aKey, aValue);
}
/**
* creates a new Transformer object using a stylesheet based on the transformation rules
*/
public void configureTransformation()throws TransformerConfigurationException{
final List<TransformerException> errorList = new ArrayList<TransformerException>();
javax.xml.transform.ErrorListener listener = new javax.xml.transform.ErrorListener() {
@Override
public void warning(TransformerException exception) throws TransformerException {
// TODO Auto-generated method stub
}
@Override
public void fatalError(TransformerException exception) throws TransformerException {
// TODO Auto-generated method stub
errorList.add(exception);
throw exception;
}
@Override
public void error(TransformerException exception) throws TransformerException {
// TODO Auto-generated method stub
}
};
TransformerFactory factory = TransformerFactory.newInstance();
factory.setAttribute(FeatureKeys.ALLOW_EXTERNAL_FUNCTIONS, Boolean.TRUE);
factory.setErrorListener(listener);
Templates templates = null;
try{
if (this.ruleLanguageParser.isXslStylesheet()){
templates = factory.newTemplates(new StreamSource(new StringReader(ruleLanguageParser.getXslStylesheet())));
}else{
templates = factory.newTemplates(new StreamSource(createStylesheet()));
}
transformer = templates.newTransformer();
//((net.sf.saxon.Controller)transformer).setMessageEmitter(mw);
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
Templates templateFailed = factory.newTemplates(xsltSyntaxcheckFailed);
transformerFailed = templateFailed.newTransformer();
}catch(TransformerConfigurationException e){
if (!errorList.isEmpty()) {
System.out.println(errorList.get(0).getMessageAndLocation()); // todo it seems the location information is not yet correct
throw new TransformerConfigurationException(errorList.get(0).getMessageAndLocation());
}else{
throw e;
}
}
//((net.sf.saxon.Controller)transformerFailed).setMessageEmitter(mw);
}
/* (non-Javadoc)
* @see eu.dnetlib.data.collective.transformation.engine.core.ITransformation#transformRecord(java.lang.String, int)
*/
public String transformRecord(String record, int index)throws TerminationException, TransformationException{
try {
StreamSource s = new StreamSource(new StringReader(record));
StringWriter writer = new StringWriter();
StreamResult r = new StreamResult(writer);
transformer.setParameter("index", index);
transformer.transform(s , r);
return writer.toString();
}catch (TerminationException e) {
log.debug(e.getLocalizedMessage());
throw e;
} catch (TransformerException e) {
log.error(e);
throw new TransformationException(e);
}
}
public String transformRecord(String record, Map<String, String> parameters) throws TerminationException, TransformationException{
try {
StreamSource s = new StreamSource(new StringReader(record));
StringWriter writer = new StringWriter();
StreamResult r = new StreamResult(writer);
for (String key: parameters.keySet()){
transformer.setParameter(key, parameters.get(key));
}
transformer.transform(s , r);
return writer.toString();
}catch (TerminationException e){
log.debug(e.getLocalizedMessage());
throw e;
} catch (TransformerException e) {
log.error(e);
throw new TransformationException(e);
}
}
public String transformRecord(String record, String stylesheetName) throws TransformationException{
if (!stylesheetName.equals(XSLSyntaxcheckfailed))
throw new IllegalArgumentException("in TransformationImpl: stylesheetname " + stylesheetName + " is unsupported!" );
try{
StreamSource s = new StreamSource(new StringReader(record));
StringWriter w = new StringWriter();
StreamResult r = new StreamResult(w);
transformerFailed.transform(s, r);
return w.toString();
}catch (TransformerException e){
log.error(e);
throw new TransformationException(e);
}
}
public String dumpStylesheet(){
return xslDoc.asXML();
// StringWriter writer = new StringWriter();
// try {
// Transformer tXsl = transformer; //.newTransformer();
// tXsl.setOutputProperty(OutputKeys.INDENT, "yes");
// tXsl.setOutputProperty(OutputKeys.METHOD, "xml");
// tXsl.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
//
// StreamResult r = new StreamResult(writer);
// Source s = new StreamSource(new StringReader(xslDoc.asXML()));
// tXsl.transform(s, r);
// } catch (TransformerException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// return writer.toString();
}
/**
* sets the XSL template
* @param template - resource to access the XSL template
*/
public void setTemplate(Resource template) {
this.template = template;
}
/**
* @return the resource to access the XSL template
*/
public Resource getTemplate() {
return template;
}
public void setRuleLanguageParser(RuleLanguageParser ruleLanguageParser) {
this.ruleLanguageParser = ruleLanguageParser;
}
public RuleLanguageParser getRuleLanguageParser() {
return ruleLanguageParser;
}
/**
* @param stylesheetBuilder the stylesheetBuilder to set
*/
public void setStylesheetBuilder(StylesheetBuilder stylesheetBuilder) {
this.stylesheetBuilder = stylesheetBuilder;
}
/**
* @return the stylesheetBuilder
*/
public StylesheetBuilder getStylesheetBuilder() {
return stylesheetBuilder;
}
/**
* @return the transformation rules as String object
*/
protected String getTransformationRules(){
// add job-properties to the rules as variables
for (String key: jobConstantMap.keySet()){
Rules r = new Rules();
r.setVariable(key);
r.setConstant("'" + jobConstantMap.get(key) + "'");
ruleLanguageParser.getVariableMappingRules().put(JOBCONST_DATASINKID, r);
}
if (this.stylesheetBuilder == null){
// create DMF compliant stylesheet builder
this.stylesheetBuilder = new StylesheetBuilder();
this.stylesheetBuilder.setRuleLanguageParser(this.ruleLanguageParser);
NamespaceContextImpl namespaceContext = new NamespaceContextImpl();
for (String prefix: ruleLanguageParser.getNamespaceDeclarations().keySet()){
namespaceContext.addNamespace(prefix, ruleLanguageParser.getNamespaceDeclarations().get(prefix));
}
SchemaInspector inspector = new SchemaInspector();
try {
inspector.inspect(this.schema.getURL(), rootElement);
} catch (Exception e) {
throw new IllegalStateException(e);
}
this.stylesheetBuilder.setNamespaceContext(namespaceContext);
this.stylesheetBuilder.setSchemaInspector(inspector);
}
return this.stylesheetBuilder.createTemplate();
}
/**
* creates a stylesheet from transformation rules;
* <p>don't call this method multiple times, unless transformation configuration changes, then re-init and configure transformation</p>
* @return the stylesheet
*/
private Reader createStylesheet(){
try {
Document rulesDoc = DocumentHelper.parseText(getTransformationRules());
for(String key: this.ruleLanguageParser.getNamespaceDeclarations().keySet()){
xslDoc.getRootElement().addNamespace(key, this.ruleLanguageParser.getNamespaceDeclarations().get(key));
}
@SuppressWarnings("unchecked")
List<Node> nodes = rulesDoc.getRootElement().selectNodes("//xsl:template");
@SuppressWarnings("unchecked")
List<Node> varNodes = rulesDoc.getRootElement().selectNodes("/templateroot/xsl:param");
for (Node node: varNodes){
xslDoc.getRootElement().add( ((Element)node).detach() );
}
// xslDoc.getRootElement().add(rulesDoc.getRootElement().selectSingleNode("//xsl:param[@name='var1']").detach());
for (Node node: nodes){
xslDoc.getRootElement().add( ((Element)node).detach() ); // (rulesDoc.getRootElement().aget);
}
} catch (DocumentException e) {
log.error("error in creating stylesheet: " + e);
throw new IllegalStateException(e);
}
return new StringReader(xslDoc.asXML());
}
/**
* @param schema the schema to set
*/
public void setSchema(Resource schema) {
this.schema = schema;
}
/**
* @return the schema
*/
public Resource getSchema() {
return schema;
}
@Override
public Map<String, String> getStaticTransformationResults() {
return this.staticResults;
}
@Override
public Map<String, String> getJobProperties() {
// TODO Auto-generated method stub
return this.jobConstantMap;
}
@Override
public Properties getLogInformation() {
// TODO Auto-generated method stub
return null;
}
}