parameters) throws TerminationException, TransformationException{
+ try {
+ StreamSource s = new StreamSource(new StringReader(record));
+ StringWriter writer = new StringWriter();
+ StreamResult r = new StreamResult(writer);
+ for (String key: parameters.keySet()){
+ transformer.setParameter(key, parameters.get(key));
+ }
+ transformer.transform(s , r);
+ return writer.toString();
+ }catch (TerminationException e){
+ log.debug(e.getLocalizedMessage());
+ throw e;
+ } catch (TransformerException e) {
+ log.error(e);
+ throw new TransformationException(e);
+ }
+ }
+
+ public String transformRecord(String record, String stylesheetName) throws TransformationException{
+ if (!stylesheetName.equals(XSLSyntaxcheckfailed))
+ throw new IllegalArgumentException("in TransformationImpl: stylesheetname " + stylesheetName + " is unsupported!" );
+ try{
+ StreamSource s = new StreamSource(new StringReader(record));
+ StringWriter w = new StringWriter();
+ StreamResult r = new StreamResult(w);
+ transformerFailed.transform(s, r);
+ return w.toString();
+ }catch (TransformerException e){
+ log.error(e);
+ throw new TransformationException(e);
+ }
+ }
+
+ public String dumpStylesheet(){
+ return xslDoc.asXML();
+
+// StringWriter writer = new StringWriter();
+// try {
+// Transformer tXsl = transformer; //.newTransformer();
+// tXsl.setOutputProperty(OutputKeys.INDENT, "yes");
+// tXsl.setOutputProperty(OutputKeys.METHOD, "xml");
+// tXsl.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
+//
+// StreamResult r = new StreamResult(writer);
+// Source s = new StreamSource(new StringReader(xslDoc.asXML()));
+// tXsl.transform(s, r);
+// } catch (TransformerException e) {
+// // TODO Auto-generated catch block
+// e.printStackTrace();
+// }
+// return writer.toString();
+ }
+
+
+ /**
+ * sets the XSL template
+ * @param template - resource to access the XSL template
+ */
+ public void setTemplate(Resource template) {
+ this.template = template;
+ }
+
+ /**
+ * @return the resource to access the XSL template
+ */
+ public Resource getTemplate() {
+ return template;
+ }
+
+ public void setRuleLanguageParser(RuleLanguageParser ruleLanguageParser) {
+ this.ruleLanguageParser = ruleLanguageParser;
+ }
+
+ public RuleLanguageParser getRuleLanguageParser() {
+ return ruleLanguageParser;
+ }
+
+ /**
+ * @param stylesheetBuilder the stylesheetBuilder to set
+ */
+ public void setStylesheetBuilder(StylesheetBuilder stylesheetBuilder) {
+ this.stylesheetBuilder = stylesheetBuilder;
+ }
+
+ /**
+ * @return the stylesheetBuilder
+ */
+ public StylesheetBuilder getStylesheetBuilder() {
+ return stylesheetBuilder;
+ }
+
+ /**
+ * @return the transformation rules as String object
+ */
+ protected String getTransformationRules(){
+ // add job-properties to the rules as variables
+ for (String key: jobConstantMap.keySet()){
+ Rules r = new Rules();
+ r.setVariable(key);
+ r.setConstant("'" + jobConstantMap.get(key) + "'");
+ ruleLanguageParser.getVariableMappingRules().put(JOBCONST_DATASINKID, r);
+ }
+ if (this.stylesheetBuilder == null){
+ // create DMF compliant stylesheet builder
+ this.stylesheetBuilder = new StylesheetBuilder();
+ this.stylesheetBuilder.setRuleLanguageParser(this.ruleLanguageParser);
+ NamespaceContextImpl namespaceContext = new NamespaceContextImpl();
+ for (String prefix: ruleLanguageParser.getNamespaceDeclarations().keySet()){
+ namespaceContext.addNamespace(prefix, ruleLanguageParser.getNamespaceDeclarations().get(prefix));
+ }
+ SchemaInspector inspector = new SchemaInspector();
+ try {
+ inspector.inspect(this.schema.getURL(), rootElement);
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ this.stylesheetBuilder.setNamespaceContext(namespaceContext);
+ this.stylesheetBuilder.setSchemaInspector(inspector);
+ }
+ return this.stylesheetBuilder.createTemplate();
+ }
+
+ /**
+ * creates a stylesheet from transformation rules;
+ * don't call this method multiple times, unless transformation configuration changes, then re-init and configure transformation
+ * @return the stylesheet
+ */
+ private Reader createStylesheet(){
+ try {
+ Document rulesDoc = DocumentHelper.parseText(getTransformationRules());
+ for(String key: this.ruleLanguageParser.getNamespaceDeclarations().keySet()){
+ xslDoc.getRootElement().addNamespace(key, this.ruleLanguageParser.getNamespaceDeclarations().get(key));
+ }
+ @SuppressWarnings("unchecked")
+ List nodes = rulesDoc.getRootElement().selectNodes("//xsl:template");
+
+ @SuppressWarnings("unchecked")
+ List varNodes = rulesDoc.getRootElement().selectNodes("/templateroot/xsl:param");
+ for (Node node: varNodes){
+ xslDoc.getRootElement().add( ((Element)node).detach() );
+ }
+
+// xslDoc.getRootElement().add(rulesDoc.getRootElement().selectSingleNode("//xsl:param[@name='var1']").detach());
+ for (Node node: nodes){
+ xslDoc.getRootElement().add( ((Element)node).detach() ); // (rulesDoc.getRootElement().aget);
+ }
+ } catch (DocumentException e) {
+ log.error("error in creating stylesheet: " + e);
+ throw new IllegalStateException(e);
+ }
+ return new StringReader(xslDoc.asXML());
+ }
+
+ /**
+ * @param schema the schema to set
+ */
+ public void setSchema(Resource schema) {
+ this.schema = schema;
+ }
+
+ /**
+ * @return the schema
+ */
+ public Resource getSchema() {
+ return schema;
+ }
+
+ @Override
+ public Map getStaticTransformationResults() {
+ return this.staticResults;
+ }
+
+ @Override
+ public Map getJobProperties() {
+ // TODO Auto-generated method stub
+ return this.jobConstantMap;
+ }
+
+ @Override
+ public Properties getLogInformation() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/AbstractTransformationFunction.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/AbstractTransformationFunction.java
new file mode 100644
index 0000000..933307d
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/AbstractTransformationFunction.java
@@ -0,0 +1,12 @@
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.util.List;
+
+public abstract class AbstractTransformationFunction implements
+ ITransformationFunction {
+
+ List objectRecords;
+ List resultRecords;
+
+ abstract String execute() throws ProcessingException;
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Convert.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Convert.java
new file mode 100644
index 0000000..65cd79c
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Convert.java
@@ -0,0 +1,74 @@
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.util.LinkedList;
+import java.util.List;
+
+import javax.annotation.Resource;
+
+import eu.dnetlib.data.collective.transformation.VocabularyRegistry;
+
+/**
+ * @author jochen
+ *
+ */
+public class Convert extends AbstractTransformationFunction {
+
+ public static final String paramVocabularyName = "vocabularyName";
+ public static final String paramFieldValue = "fieldValue";
+ public static final String paramDefaultPattern = "defaultPattern";
+ public static final String paramFunction = "function";
+
+ @Resource
+ private VocabularyRegistry vocabularyRegistry;
+
+
+ /**
+ * not implemented
+ * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
+ */
+ public String execute() throws ProcessingException {
+ return null;
+ }
+
+ /**
+ * extracts and returns the encoded value as used in the vocabulary
+ * @param vocabularyName the name of the vocabulary to be used
+ * @param fieldValues the list of values to normalize
+ * @return encoded value
+ * @throws ProcessingException
+ */
+ public String executeSingleValue(String vocabularyName, List fieldValues)throws ProcessingException{
+ if (!vocabularyRegistry.getVocabularies().containsKey(vocabularyName)){
+ throw new ProcessingException("unknown vocabulary: " + vocabularyName);
+ }
+ String returnValue = vocabularyRegistry.getVocabulary(vocabularyName).encoding(fieldValues);
+ return returnValue;
+ }
+
+ public List executeAllValues(String vocabularyName, List fieldValues) throws ProcessingException{
+ if (!vocabularyRegistry.getVocabularies().containsKey(vocabularyName)){
+ throw new ProcessingException("unknown vocabulary: " + vocabularyName);
+ }
+ List computedValues = new LinkedList();
+ int numOfComputedValues = fieldValues.size();
+ if (numOfComputedValues == 0) numOfComputedValues = 1; // return at least 1 value
+ String returnValue = vocabularyRegistry.getVocabulary(vocabularyName).encoding(fieldValues);
+ for (int i = 0; i < numOfComputedValues; i++){
+ computedValues.add(returnValue);
+ }
+ return computedValues;
+ }
+
+ public List executeFilterByParams(String vocabName, List fieldValues, String defaultPattern, String filterFunction) throws ProcessingException{
+ return vocabularyRegistry.getVocabulary(vocabName).encoding(fieldValues, defaultPattern, filterFunction);
+ }
+
+ public VocabularyRegistry getVocabularyRegistry() {
+ return vocabularyRegistry;
+ }
+
+ public void setVocabularyRegistry(VocabularyRegistry vocabularyRegistry) {
+ this.vocabularyRegistry = vocabularyRegistry;
+ }
+
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabulary.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabulary.java
new file mode 100644
index 0000000..3a433cf
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabulary.java
@@ -0,0 +1,108 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Date;
+//import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+//import java.util.Map;
+
+
+
+
+import org.apache.oro.text.perl.Perl5Util;
+
+
+/**
+ * @author jochen
+ *
+ */
+public class DateVocabulary extends Vocabulary{
+
+ private static final String filterFuncMin = "min()";
+ private String pattern_1 = "/^(\\d{4,4}-\\d{1,2}-\\d{1,2})/";
+ private String pattern_2 = "/^(\\d{4,4}-\\d{1,2})$/";
+ private String pattern_3 = "/^(\\d{4,4})$/";
+ private String pattern_4 = "/^(\\d{1,2}.\\d{1,2}.\\d{4,4})$/";
+ private SimpleDateFormat df;
+
+ private transient Perl5Util perl5 = new Perl5Util();
+
+ public String encoding(List aKeys) throws ProcessingException{
+ String tempKey_1 = null;
+ String tempKey_2 = null;
+ String tempKey_3 = null;
+ String currentKey = null;
+ String twoDigitFormat = String.format("%%0%dd", 2);
+
+ try{
+ for (String key: aKeys){
+ key = key.trim();
+ currentKey = key;
+ if (perl5.match(pattern_1, key)){
+ String[] dateSplitted = perl5.getMatch().toString().split("-");
+ String dateNormalized = dateSplitted[0] + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[1])) + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[2]));
+ return dateNormalized;
+ }else if (perl5.match(pattern_2, key)){
+ String[] dateSplitted = key.split("-");
+ tempKey_1 = dateSplitted[0] + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[1])) + "-01";
+ }else if (perl5.match(pattern_3, key)){
+ tempKey_2 = key + "-01-01";
+ }else if (perl5.match(pattern_4, key)){
+ String[] components = key.split("[\\-\\/\\.]");
+ // ignore this key if it has less than 3 components
+ if (components.length >= 3)
+ tempKey_3 = components[2] + "-" + String.format(twoDigitFormat, Integer.parseInt(components[1])) + "-" + String.format(twoDigitFormat, Integer.parseInt(components[0]));
+ }
+ }
+ }catch(Throwable e){
+ throw new ProcessingException("Exception thrown in Datevocabulary (tried to match for value '" + currentKey + "'):", e);
+ }
+ if (tempKey_1 != null){
+ return tempKey_1;
+ }else if (tempKey_2 != null){
+ return tempKey_2;
+ }else if (tempKey_3 != null){
+ return tempKey_3;
+ }else{
+ return "";
+ }
+ }
+
+ @Override
+ public List encoding(List aKeys, String aDefaultPattern,
+ String aFilterFunction) throws ProcessingException {
+ List evList = new LinkedList();
+ df = new SimpleDateFormat(aDefaultPattern);
+ for (String v: aKeys){
+ String ev = encoding(Arrays.asList(new String[]{v}));
+ if (ev.length() > 0){
+ try {
+ if (aFilterFunction.trim().length() > 0 && !evList.isEmpty())
+ evList.add( filter(df.parse(ev), df.parse(evList.remove(0)), aFilterFunction) );
+ else
+ evList.add(df.format(df.parse(ev)));
+ } catch (ParseException e) {
+ throw new ProcessingException("invalid date format: " + ev);
+ }
+ }
+ }
+ return evList;
+ }
+
+ private String filter(Date d1, Date d2, String filter) throws ProcessingException{
+ if (filter.equals(filterFuncMin))
+ if (d1.before(d2))
+ return df.format(d1);
+ else
+ return df.format(d2);
+ else
+ throw new ProcessingException("unsupported filter function: " + filter);
+ }
+
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Dblookup.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Dblookup.java
new file mode 100644
index 0000000..ea39d08
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Dblookup.java
@@ -0,0 +1,72 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.io.StringReader;
+
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;
+
+import org.w3c.dom.Node;
+import org.xml.sax.InputSource;
+
+import eu.dnetlib.data.collective.transformation.IDatabaseConnector;
+import eu.dnetlib.data.collective.transformation.TransformationException;
+
+/**
+ * @author jochen
+ *
+ */
+public class Dblookup extends AbstractTransformationFunction {
+
+ public static final String paramSqlExpr = "sqlExpr";
+ private IDatabaseConnector dbConnector;
+ /**
+ *
+ */
+ public Dblookup() {
+ // TODO Auto-generated constructor stub
+ }
+
+ /* (non-Javadoc)
+ * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
+ */
+ @Override
+ String execute() throws ProcessingException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ /**
+ * @return the dbConnector
+ */
+ public IDatabaseConnector getDbConnector() {
+ return dbConnector;
+ }
+
+ /**
+ * @param dbConnector the dbConnector to set
+ */
+ public void setDbConnector(IDatabaseConnector dbConnector) {
+ this.dbConnector = dbConnector;
+ }
+
+ public LookupRecord getResults(String aSqlExpression) throws TransformationException, XPathExpressionException {
+ LookupRecord lookupRecord = new LookupRecord();
+ XPath xpath = XPathFactory.newInstance().newXPath();
+
+ for (String record: dbConnector.getResult(aSqlExpression)){
+ InputSource inSource = new InputSource(new StringReader(record));
+ Node root = (Node)xpath.evaluate("/", inSource, XPathConstants.NODE);
+ lookupRecord.setRecord(xpath.evaluate("//FIELD[@name='accessinfopackage']/text()", root),
+ "officialname", xpath.evaluate("//FIELD[@name='officialname']/text()", root));
+ lookupRecord.setRecord(xpath.evaluate("//FIELD[@name='accessinfopackage']/text()", root),
+ "id", xpath.evaluate("//FIELD[@name='id']/text()", root));
+ }
+ return lookupRecord;
+ }
+
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Extract.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Extract.java
new file mode 100644
index 0000000..69a8fec
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Extract.java
@@ -0,0 +1,50 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.util.List;
+
+import eu.dnetlib.data.collective.transformation.TransformationException;
+
+/**
+ * @author jochen
+ *
+ */
+public class Extract extends AbstractTransformationFunction {
+
+ public static final String paramNameFeature = "feature";
+ private IFeatureExtraction featureExtraction;
+
+ /* (non-Javadoc)
+ * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
+ */
+ @Override
+ String execute() throws ProcessingException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public List execute(List aObjectRecords, String aFeature) throws ProcessingException{
+ try {
+ return featureExtraction.execute(aObjectRecords, aFeature);
+ } catch (TransformationException e) {
+ throw new ProcessingException(e);
+ }
+ }
+
+ /**
+ * @param featureExtraction the featureExtraction to set
+ */
+ public void setFeatureExtraction(IFeatureExtraction featureExtraction) {
+ this.featureExtraction = featureExtraction;
+ }
+
+ /**
+ * @return the featureExtraction
+ */
+ public IFeatureExtraction getFeatureExtraction() {
+ return featureExtraction;
+ }
+
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IFeatureExtraction.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IFeatureExtraction.java
new file mode 100644
index 0000000..127d54f
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IFeatureExtraction.java
@@ -0,0 +1,25 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.util.List;
+
+import eu.dnetlib.data.collective.transformation.TransformationException;
+
+
+/**
+ * @author jochen
+ *
+ */
+public interface IFeatureExtraction {
+
+ /**
+ * applies the extraction of a feature on objectRecords
+ * @param aObjectRecords
+ * @param aFeatureName
+ * @return list of extracted results
+ * @throws TransformationServiceException
+ */
+ public List execute(List aObjectRecords, String aFeatureName) throws TransformationException;
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ITransformationFunction.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ITransformationFunction.java
new file mode 100644
index 0000000..add8e83
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ITransformationFunction.java
@@ -0,0 +1,5 @@
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+public interface ITransformationFunction {
+
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IVocabulary.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IVocabulary.java
new file mode 100644
index 0000000..19cd01e
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IVocabulary.java
@@ -0,0 +1,31 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.util.List;
+
+/**
+ * @author jochen
+ *
+ */
+public interface IVocabulary {
+
+ /**
+ * return the encoding for a given list of values
+ * @param keys
+ * @return the encoding as string
+ * @throws ProcessingException
+ */
+ public String encoding(List keys) throws ProcessingException;
+
+ /**
+ * return the encoding for a given list of values using a default pattern and applying a filter function
+ * @param aKeys
+ * @param aDefaultPattern
+ * @param aFilterFunction
+ * @return the list of encoded values
+ * @throws ProcessingException
+ */
+ public List encoding(List aKeys, String aDefaultPattern, String aFilterFunction) throws ProcessingException;
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.java
new file mode 100644
index 0000000..7891e4b
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.java
@@ -0,0 +1,114 @@
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpressionException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+public class IdentifierExtract extends AbstractTransformationFunction{
+ public static final Log log = LogFactory.getLog(IdentifierExtract.class);
+ public static final String paramXpathExprJson = "xpathExprJson";
+ public static final String paramXpathExprInSource = "xpathExprInputSource";
+ public static final String paramRegExpr = "regExpr";
+
+ @Override
+ String execute() throws ProcessingException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ /**
+ * extract content matched by a regular expression pattern from a given node and return matched content as a node-list
+ * @param aXpathExprList
+ * @param aInput
+ * @param aRegExpression
+ * @param aDocument
+ * @param aXpath
+ * @return nodeList
+ * @throws ProcessingException
+ */
+ public NodeList extract(List aXpathExprList, Node aInput,
+ String aRegExpression, Document aDocument, XPath aXpath) throws ProcessingException {
+
+ log.debug("xpathExprList: " + aXpathExprList);
+ log.debug("regExpr: " + aRegExpression);
+ Set identifierSet = new HashSet();
+
+// log.debug("node: length: " + aInput.getChildNodes().getLength());
+ log.debug("regular expression : " + aRegExpression);
+ Pattern p = Pattern.compile(aRegExpression);
+ try {
+ List textList = extractText(aXpathExprList, aInput, aXpath);
+ for (String text: textList){
+ log.debug("text as input : " + text);
+ Matcher m = p.matcher(text);
+ while (m.find()){
+ log.debug("extracted identifier: " + m.group());
+ identifierSet.add(m.group());
+ }
+ }
+ return toNodeList(identifierSet, aDocument);
+ } catch (XPathExpressionException e) {
+ e.printStackTrace();
+ throw new ProcessingException(e);
+ } catch (ParserConfigurationException e) {
+ e.printStackTrace();
+ throw new ProcessingException(e);
+ }
+ }
+
+ /**
+ * create a list of nodes from a list of string values
+ * @param aValueSet, set of unique values
+ * @param aDocument
+ * @return nodeList
+ */
+ private NodeList toNodeList(Set aValueSet, Document aDocument){
+ DocumentFragment dFrag = aDocument.createDocumentFragment();
+ Element root = aDocument.createElement("root");
+ dFrag.appendChild(root);
+ for (String value: aValueSet){
+ Element eVal = aDocument.createElement("value");
+ eVal.setTextContent(value);
+ root.appendChild(eVal);
+ }
+ return dFrag.getChildNodes();
+ }
+
+ /**
+ * extract text from a given node using a list of given xpath expressions
+ * @param aXpathExprList
+ * @param aInput
+ * @param aXpath
+ * @return list of strings
+ * @throws XPathExpressionException
+ * @throws ParserConfigurationException
+ */
+ private List extractText(List aXpathExprList, Node aInput, XPath aXpath) throws XPathExpressionException, ParserConfigurationException{
+
+ List resultList = new LinkedList();
+ for (String xpathExpr: aXpathExprList){
+ NodeList nodeList = (NodeList)aXpath.evaluate(xpathExpr, aInput, XPathConstants.NODESET);
+ log.debug("extract text: nodelist length: " + nodeList.getLength());
+ for (int i = 0; i < nodeList.getLength(); i++){
+ resultList.add(nodeList.item(i).getTextContent());
+ }
+ }
+ return resultList;
+ }
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Lookup.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Lookup.java
new file mode 100644
index 0000000..c9b02d7
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Lookup.java
@@ -0,0 +1,34 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * @author jochen
+ *
+ */
+public class Lookup extends AbstractTransformationFunction {
+ public static final Log log = LogFactory.getLog(Lookup.class);
+ public static final String paramExprIdentifier = "exprIdentifier";
+ public static final String paramExprProperty = "exprProperty";
+
+ /**
+ *
+ */
+ public Lookup() {
+ // TODO Auto-generated constructor stub
+ }
+
+ /* (non-Javadoc)
+ * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
+ */
+ @Override
+ String execute() throws ProcessingException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/LookupRecord.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/LookupRecord.java
new file mode 100644
index 0000000..0c7f00a
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/LookupRecord.java
@@ -0,0 +1,33 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Properties;
+
+/**
+ * @author jochen
+ *
+ */
+public class LookupRecord {
+
+ private HashMap recordMap = new LinkedHashMap();
+
+ public void setRecord(String aRecordKey, String aPropertyKey, String aPropertyValue){
+ if (recordMap.containsKey(aRecordKey)){
+ recordMap.get(aRecordKey).setProperty(aPropertyKey, aPropertyValue);
+ }else{
+ Properties p = new Properties();
+ p.setProperty(aPropertyKey, aPropertyValue);
+ recordMap.put(aRecordKey, p);
+ }
+ }
+
+ public String getPropertyValue(String aRecordKey, String aPropertyKey){
+ if (!recordMap.containsKey(aRecordKey)) return "UNKNOWN";
+ return recordMap.get(aRecordKey).getProperty(aPropertyKey, "UNKNOWN");
+ }
+
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/PersonVocabulary.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/PersonVocabulary.java
new file mode 100644
index 0000000..a564972
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/PersonVocabulary.java
@@ -0,0 +1,26 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.util.List;
+
+import prototype.Person;
+
+/**
+ * @author jochen
+ *
+ */
+public class PersonVocabulary extends Vocabulary{
+
+ @Override
+ public String encoding(List keys)throws ProcessingException{
+ Person p;
+ String result = "";
+ for (String input: keys){
+ p = new Person(input);
+ result = p.getNormalisedFullname();
+ }
+ return result;
+ }
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ProcessingException.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ProcessingException.java
new file mode 100644
index 0000000..47121cd
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ProcessingException.java
@@ -0,0 +1,46 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+/**
+ * @author jochen
+ *
+ */
+public class ProcessingException extends Exception {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = -8648116731979859467L;
+
+ /**
+ *
+ */
+ public ProcessingException() {
+ super();
+ }
+
+ /**
+ * @param arg0
+ */
+ public ProcessingException(String arg0) {
+ super(arg0);
+ }
+
+ /**
+ * @param arg0
+ */
+ public ProcessingException(Throwable arg0) {
+ super(arg0);
+ }
+
+ /**
+ * @param arg0
+ * @param arg1
+ */
+ public ProcessingException(String arg0, Throwable arg1) {
+ super(arg0, arg1);
+ }
+
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpression.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpression.java
new file mode 100644
index 0000000..8cb45da
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpression.java
@@ -0,0 +1,60 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.oro.text.perl.MalformedPerl5PatternException;
+import org.apache.oro.text.perl.Perl5Util;
+
+/**
+ * @author jochen
+ *
+ */
+public class RegularExpression extends AbstractTransformationFunction {
+
+ public static final Log log = LogFactory.getLog(RegularExpression.class);
+ public static final String paramRegularExpr = "regularExpression";
+ public static final String paramExpr1 = "expr1";
+ public static final String paramExpr2 = "expr2";
+
+ private Perl5Util util = new Perl5Util();
+
+ /* (non-Javadoc)
+ * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
+ */
+ @Override
+ String execute() throws ProcessingException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public String executeSingleValue(String aRegularExpression, String aExpr1, String aExpr2) throws ProcessingException{
+ String result = "";
+ if (aRegularExpression.startsWith("m/")){
+ if (util.match(aRegularExpression, aExpr1))
+ result = util.group(1);
+ }else if (!aRegularExpression.startsWith("s/")){
+ // assume match and extract
+ // throw new ProcessingException("unsupported or invalid regular expression: " + aRegularExpression);
+ if (util.match(aRegularExpression, aExpr1)){
+ String funder = util.group(1).toLowerCase();
+ String projectId = util.group(3);
+ result = funder + "_" + projectId;
+ }
+ }else{
+ try{
+ result = util.substitute(aRegularExpression, aExpr1);
+ }catch(MalformedPerl5PatternException patternExc){
+ log.fatal("aRegularExpression: " + aRegularExpression);
+ log.fatal("aExpr1: " + aExpr1);
+ log.fatal(patternExc.getMessage());
+ throw new ProcessingException(patternExc);
+ }
+ }
+ return result;
+ }
+
+
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValue.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValue.java
new file mode 100644
index 0000000..a22c5ed
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValue.java
@@ -0,0 +1,157 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.io.StringReader;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import javax.xml.namespace.NamespaceContext;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;
+
+import org.apache.commons.lang3.StringEscapeUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.xml.sax.InputSource;
+
+import eu.dnetlib.common.profile.Resource;
+import eu.dnetlib.common.profile.ResourceDao;
+import eu.dnetlib.data.collective.transformation.rulelanguage.Argument;
+
+/**
+ * @author jochen
+ *
+ */
+public class RetrieveValue extends AbstractTransformationFunction {
+
+ public static final Log log = LogFactory.getLog(RetrieveValue.class);
+ public static final String paramFunctionName = "functionName";
+ public static final String paramFunctionProfileId = "functionParameterProfileId";
+ public static final String paramFunctionExpr = "functionParameterExpr";
+
+ public enum FUNCTION {PROFILEFIELD, CURRENTDATE};
+
+ @javax.annotation.Resource
+ private ResourceDao resourceDao;
+
+ /* (non-Javadoc)
+ * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
+ */
+ @Override
+ String execute() throws ProcessingException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public String executeSingleValue(String functionName, List arguments, String objRecord, Map namespaceMap) throws ProcessingException{
+ String result = "";
+ FUNCTION function = FUNCTION.valueOf(functionName);
+
+ switch(function){
+ case PROFILEFIELD:
+ if (arguments.size() != 2){
+ throw new ProcessingException("invalid number of arguments - required 2 but found :" + arguments.size());
+ }
+ String arg = "";
+ Resource resource = null;
+ try{
+ if (arguments.get(0).isValue()){
+ arg = arguments.get(0).getArgument();
+ log.debug("retrieve value arg isValue: " + arg);
+ if (arg.startsWith("collection(")) { // xquery
+ arg = StringEscapeUtils.unescapeXml(arg);
+ resource = resourceDao.getResourceByQuery(arg); // query
+ }else
+ resource = resourceDao.getResource(arg); // profile id
+ }else if (arguments.get(0).isInputField()){
+ arg = evaluateXpath(objRecord, arguments.get(0).getArgument(), namespaceMap);
+ log.debug("retrieve value arg isInputField: " + arg);
+ if (arg.startsWith("collection(")) { // xquery
+ arg = StringEscapeUtils.unescapeXml(arg);
+ resource = resourceDao.getResourceByQuery(arg); // query
+ }else
+ resource = resourceDao.getResource(arg); // profile id
+ }else if (arguments.get(0).isJobConst()){
+ // TODO
+ }else if (arguments.get(0).isVariable()){
+ // TODO
+ log.warn("RETRIEVEVALUE: support for variables not yet implemented.");
+ }
+ }catch(Exception e){
+ throw new ProcessingException(e);
+ }
+
+ if (resource == null){
+ throw new ProcessingException("invalid profileId: " + arg + "; functionName: " + functionName + ", arg1: " + arguments.get(0).getArgument() + ", arg2: " + arguments.get(1).getArgument());
+ }
+ result = resource.getValue(arguments.get(1).getArgument()); // xpath expr
+ break;
+ case CURRENTDATE:
+ SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); // TODO format string
+ result = dateFormat.format(new Date());
+ default:
+ // unsupported
+ break;
+ }
+ return result;
+ }
+
+ /**
+ * @return the resourceDao
+ */
+ public ResourceDao getResourceDao() {
+ return resourceDao;
+ }
+
+ /**
+ * @param resourceDao the resourceDao to set
+ */
+ public void setResourceDao(ResourceDao resourceDao) {
+ this.resourceDao = resourceDao;
+ }
+
+ private String evaluateXpath(String record, String xpathExpr, Map nsMap){
+ XPath xpath = XPathFactory.newInstance().newXPath();
+ xpath.setNamespaceContext(new NamespaceContext() {
+
+ @Override
+ public Iterator getPrefixes(String namespaceURI) {
+ return null;
+ }
+
+ @Override
+ public String getPrefix(String namespaceURI) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public String getNamespaceURI(String prefix) {
+ if ("dri".equals(prefix)){
+ return "http://www.driver-repository.eu/namespace/dri";
+ }else if ("dr".equals(prefix)){
+ return "http://www.driver-repository.eu/namespace/dr";
+ }else if ("dc".equals(prefix)){
+ return "http://purl.org/dc/elements/1.1/";
+ }else if ("oaf".equals(prefix)){
+ return "http://namespace.openaire.eu/oaf";
+ }else if ("prov".equals(prefix)){
+ return "http://www.openarchives.org/OAI/2.0/provenance";
+ }
+ return "";
+ }
+ });
+ try {
+ return xpath.evaluate(xpathExpr, new InputSource(new StringReader(record)));
+ } catch (XPathExpressionException e) {
+ log.fatal("cannot evaluate xpath");
+ throw new IllegalStateException(e);
+ }
+ }
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Split.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Split.java
new file mode 100644
index 0000000..7f79aa4
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Split.java
@@ -0,0 +1,86 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.springframework.util.StringUtils;
+
+/**
+ * @author js
+ *
+ */
+public class Split extends AbstractTransformationFunction {
+
+ public static final Log log = LogFactory.getLog(Split.class);
+ public static final String paramInputExpr = "inputExpr";
+ public static final String paramRegExpr = "regExpr";
+ public static final String paramElementName = "elementName";
+
+ private Map> queueMap = new HashMap>();
+
+ /* (non-Javadoc)
+ * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
+ */
+ @Override
+ String execute() throws ProcessingException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ /**
+ * split a given list of values using a delimiter as regularExpression
+ * @param aInputValue
+ * @param aRegExpr
+ * @return the collection of all values splitted
+ */
+ public Collection executeAllValues(List aInputValues, String aRegExpr) throws ProcessingException{
+ Collection result = new LinkedList();
+ for (String value: aInputValues){
+ String[] values = StringUtils.tokenizeToStringArray(value, aRegExpr, true, true);
+ result.addAll(Arrays.asList(values));
+ }
+ return result;
+ }
+
+ /**
+ * split a given list of values stored in an internal queue and return the element from the head of the queue (recursive)
+ * @param aInputValues
+ * @param aRegExpr
+ * @param aCallId
+ * @return
+ * @throws ProcessingException
+ */
+ public String executeSingleValue(List aInputValues, String aRegExpr, String aCallId) throws ProcessingException{
+ if (!queueMap.containsKey(aCallId)){
+ Queue queue = new LinkedList();
+ queueMap.put(aCallId, queue);
+ for (String value: aInputValues){
+ String[] values = StringUtils.tokenizeToStringArray(value, aRegExpr, true, true);
+ queue.addAll(Arrays.asList(values));
+ }
+ }
+ String result = queueMap.get(aCallId).poll();
+ if (result == null){
+ queueMap.remove(aCallId);
+ }
+ return result;
+ }
+
+ public String executeSingleValue(String aCallId) throws ProcessingException{
+ String result = queueMap.get(aCallId).poll();
+ if (result == null){
+ queueMap.remove(aCallId);
+ }
+ return result;
+ }
+}
diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Vocabulary.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Vocabulary.java
new file mode 100644
index 0000000..7cc9927
--- /dev/null
+++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Vocabulary.java
@@ -0,0 +1,209 @@
+/**
+ *
+ */
+package eu.dnetlib.data.collective.transformation.engine.functions;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.dom4j.Node;
+
+import eu.dnetlib.common.profile.Resource;
+import eu.dnetlib.common.utils.XMLUtils;
+
+/**
+ * @author jochen
+ *
+ */
+public class Vocabulary implements IVocabulary{
+
+ private List terms;
+ private Map encodingMap;
+ private Resource resource;
+ private boolean isCaseSensitive = true;
+ private String delimiter = null;
+ private String name = null;
+
+ /**
+ * @return the terms
+ */
+ public List getTerms() {
+ return terms;
+ }
+
+ /**
+ * @param terms the terms to set
+ */
+ public void setTerms(List terms) {
+ this.terms = terms;
+ }
+
+ /**
+ * @return the name
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * @param name the name to set
+ */
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getVocabularyName(){
+ return resource.getValue("//VOCABULARY_NAME");
+ }
+
+ /**
+ * returns the normalized, encoded String for a given key if found, otherwise a special value -depending on the vocabulary- is returned indicating that it couldn't be normalized
+ * @param key a list of Strings to encode
+ * @return a normalized, encoded String
+ */
+ @Override
+ public String encoding(List keys)throws ProcessingException{
+ // take the first best
+ for (String key: keys){
+ key = key.trim();
+ if (!isCaseSensitive)
+ key = key.toLowerCase();
+ if (encodingMap.containsKey(key))
+ return encodingMap.get(key);
+ }
+ if (encodingMap.containsKey("Unknown") || encodingMap.containsKey("unknown")){
+ if (isCaseSensitive) return encodingMap.get("Unknown");
+ else return encodingMap.get("unknown");
+ }else{
+ if (isCaseSensitive) return encodingMap.get("Undetermined");
+ else return encodingMap.get("undetermined");
+ }
+ }
+
+ class Term{
+ String code;
+ String name;
+ List synonyms = new LinkedList();
+
+ void addSynonym(String synonym){
+ synonyms.add(synonym);
+ }
+
+ List getSynonyms(){
+ return synonyms;
+ }
+ }
+
+
+ /**
+ * init the encoding with the given list of term parameters
+ * @param termList list of parameters with expected key:value pairs 'name':string, 'encoding':string, 'synonyms':list
+ */
+ @SuppressWarnings("unchecked")
+ public void setResource(List