package eu.dnetlib.data.collective.transformation.engine; import java.io.StringReader; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Node; import org.dom4j.XPath; import org.dom4j.io.SAXReader; import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy; import eu.dnetlib.data.collective.transformation.engine.functions.Convert; import eu.dnetlib.data.collective.transformation.engine.functions.Extract; import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException; import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression; import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue; import eu.dnetlib.data.collective.transformation.rulelanguage.IRule; import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall; /** * @author jochen * */ public class PreProcessor { @SuppressWarnings("unused") private static final Log log = LogFactory.getLog(PreProcessor.class); private Convert convertFunction; private Extract extractFunction; private RetrieveValue retrieveFunction; private RegularExpression regExprFunction; private TransformationFunctionProxy functionProxy; private SAXReader reader = new SAXReader(); private Map nsMap = new HashMap(); /** * pre-process output values from object records using a function call * @param aFunctionCall function call object * @param aObjectRecords list of object records * @param aNamespaceMap map of namespace prefixes and uris */ public void preprocess( FunctionCall aFunctionCall, List aObjectRecords, Map aNamespaceMap, Map aStaticResults, Map aJobProperties, Map aVarRules){ this.nsMap = aNamespaceMap; FunctionResults functionResults = new FunctionResults(); try { if (aFunctionCall.getExternalFunctionName().equals("extract")){ String featureName = aFunctionCall.getParameters().get(Extract.paramNameFeature); functionResults.addAll(extractFunction.execute(aObjectRecords, featureName)); }else{ for (String objRecord: aObjectRecords){ String result = null; if (aFunctionCall.getExternalFunctionName().equals("convert")){ if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())){ functionResults.add(aStaticResults.get(aFunctionCall.getUuid())); }else{ String vocabName = aFunctionCall.getParameters().get(Convert.paramVocabularyName); String fieldExpr = aFunctionCall.getParameters().get(Convert.paramFieldValue); List recordValues = getValuesFromRecord(objRecord, fieldExpr); if (aFunctionCall.isStatic()) aStaticResults.put(aFunctionCall.getUuid(), convertFunction.executeSingleValue(vocabName, recordValues)); else functionResults.add(convertFunction.executeAllValues(vocabName, recordValues)); } }else if (aFunctionCall.getExternalFunctionName().equals("getValue")){ if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())) functionResults.add(aStaticResults.get(aFunctionCall.getUuid())); else{ String functionName = aFunctionCall.getParameters().get(RetrieveValue.paramFunctionName); result = retrieveFunction.executeSingleValue(functionName, aFunctionCall.getArguments(), objRecord, nsMap); functionResults.add(result); if (aFunctionCall.isStatic()) aStaticResults.put(aFunctionCall.getUuid(), result); } }else if (aFunctionCall.getExternalFunctionName().equals("regExpr")){ // TODO if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())){ //log.debug("static functioncal; static result exist to compute regexpr: " + aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr)); // functionResults.add(aStaticResults.get(aFunctionCall.getUuid())); }else{ // log.debug("static functioncal to compute regexpr: " + aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr)); String regularExpression = aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr); //.replaceAll("'", ""); String expression1 = aFunctionCall.getParameters().get(RegularExpression.paramExpr1); List recordValues = null; // distinguish xpath-expr, jobConst, var // log.debug("expression1: " + expression1); if (aJobProperties.containsKey(expression1)){ recordValues = new LinkedList(); recordValues.add(aJobProperties.get(expression1)); }else{ recordValues = getValuesFromRecord(objRecord, expression1); } String expression2 = aFunctionCall.getParameters().get(RegularExpression.paramExpr2); String replacement = ""; if (aJobProperties.containsKey(expression2)){ replacement = aJobProperties.get(expression2); }else if (aVarRules.containsKey(expression2)){ Rules varRule = (Rules)aVarRules.get(expression2); replacement = varRule.getConstant().replace("'", ""); // currently limited to constant rules. }else { replacement = getValuesFromRecord(objRecord, expression2).get(0); // get the first available value } List regExprResults = new LinkedList(); for (String fieldValue: recordValues){ try { int lastSlash = regularExpression.lastIndexOf("/"); String trailingOptions = regularExpression.substring(lastSlash); int replacementSlash = regularExpression.substring(0, lastSlash).lastIndexOf("/"); String replacementFromExpression = regularExpression.substring(replacementSlash + 1, lastSlash); String newRegExpr = regularExpression.substring(0, replacementSlash + 1) + replacement + replacementFromExpression + trailingOptions; // ??? result = regExprFunction.executeSingleValue(newRegExpr, fieldValue, replacement); regExprResults.add(result); } catch (ProcessingException e) { throw new IllegalStateException(e); } // regExprResults.add(regExprFunction.executeSingleValue(regularExpression, fieldValue, expression2)); } functionResults.add(regExprResults); // assuming 1 result only if (aFunctionCall.isStatic()){ aStaticResults.put(aFunctionCall.getUuid(), result); } // unsupported // if (aFunctionCall.isStatic()){ // aStaticResults.put(aFunctionCall.getUuid(), result); // } } } } } } catch (ProcessingException e) { throw new IllegalStateException(e); } catch (DocumentException e) { throw new IllegalStateException(e); } functionProxy.setResults(aFunctionCall.getUuid(), functionResults); } public void setFunctionProxy(TransformationFunctionProxy functionProxy) { this.functionProxy = functionProxy; } public TransformationFunctionProxy getFunctionProxy() { return functionProxy; } public void setConvertFunction(Convert convertFunction) { this.convertFunction = convertFunction; } public Convert getConvertFunction() { return convertFunction; } /** * @param retrieveFunction the retrieveFunction to set */ public void setRetrieveFunction(RetrieveValue retrieveFunction) { this.retrieveFunction = retrieveFunction; } /** * @return the retrieveFunction */ public RetrieveValue getRetrieveFunction() { return retrieveFunction; } /** * @return the regExprFunction */ public RegularExpression getRegExprFunction() { return regExprFunction; } /** * @param regExprFunction the regExprFunction to set */ public void setRegExprFunction(RegularExpression regExprFunction) { this.regExprFunction = regExprFunction; } /** * @param extractFunction the extractFunction to set */ public void setExtractFunction(Extract extractFunction) { this.extractFunction = extractFunction; } /** * @return the extractFunction */ public Extract getExtractFunction() { return extractFunction; } /** * evaluate given XPath Expr applied on a record and return the values as a list of strings * @param record * @param xpathExpr * @return list of strings * @throws DocumentException */ @SuppressWarnings("unchecked") private List getValuesFromRecord(String record, String xpathExpr) throws DocumentException{ List values = new LinkedList(); Document doc = reader.read(new StringReader(record)); XPath xpath = DocumentHelper.createXPath(xpathExpr); xpath.setNamespaceURIs(nsMap); Object context = xpath.evaluate(doc); if (context instanceof String) values.add((String)context); else if (context instanceof List) for (Node node: (List)context) values.add(node.getText()); else if (context instanceof Node) values.add( ((Node)context).getText()); else if (context instanceof Number) values.add( ((Number)context).intValue() + ""); return values; } }