Implemented new Transformation using spark

3 years ago · 184e7b3856
parent a54848a59c
commit 184e7b3856
13 changed files with 2706 additions and 303 deletions
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationCounter.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregationCounter.java
@ -0,0 +1,45 @@
+package eu.dnetlib.dhp.aggregation.common;
+
+import org.apache.spark.util.LongAccumulator;
+
+import java.io.Serializable;
+
+
+public class AggregationCounter implements Serializable {
+    private LongAccumulator totalItems;
+    private LongAccumulator errorItems;
+    private LongAccumulator processedItems;
+
+    public AggregationCounter() {
+    }
+
+    public AggregationCounter(LongAccumulator totalItems, LongAccumulator errorItems, LongAccumulator processedItems) {
+        this.totalItems = totalItems;
+        this.errorItems = errorItems;
+        this.processedItems = processedItems;
+    }
+
+    public LongAccumulator getTotalItems() {
+        return totalItems;
+    }
+
+    public void setTotalItems(LongAccumulator totalItems) {
+        this.totalItems = totalItems;
+    }
+
+    public LongAccumulator getErrorItems() {
+        return errorItems;
+    }
+
+    public void setErrorItems(LongAccumulator errorItems) {
+        this.errorItems = errorItems;
+    }
+
+    public LongAccumulator getProcessedItems() {
+        return processedItems;
+    }
+
+    public void setProcessedItems(LongAccumulator processedItems) {
+        this.processedItems = processedItems;
+    }
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/DnetTransformationException.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/DnetTransformationException.java
@ -0,0 +1,28 @@
+package eu.dnetlib.dhp.transformation;
+
+public class DnetTransformationException extends Exception {
+
+    public DnetTransformationException() {
+        super();
+    }
+
+    public DnetTransformationException(
+            final String message,
+            final Throwable cause,
+            final boolean enableSuppression,
+            final boolean writableStackTrace) {
+        super(message, cause, enableSuppression, writableStackTrace);
+    }
+
+    public DnetTransformationException(final String message, final Throwable cause) {
+        super(message, cause);
+    }
+
+    public DnetTransformationException(final String message) {
+        super(message);
+    }
+
+    public DnetTransformationException(final Throwable cause) {
+        super(cause);
+    }
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformFunction.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformFunction.java
@ -1,74 +0,0 @@
-
-package eu.dnetlib.dhp.transformation;
-
-import java.io.ByteArrayInputStream;
-import java.io.StringWriter;
-import java.util.Map;
-
-import javax.xml.transform.stream.StreamSource;
-
-import org.apache.spark.api.java.function.MapFunction;
-import org.apache.spark.util.LongAccumulator;
-
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
-import eu.dnetlib.dhp.transformation.functions.Cleaner;
-import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
-import net.sf.saxon.s9api.*;
-
-public class TransformFunction implements MapFunction<MetadataRecord, MetadataRecord> {
-
-	private final LongAccumulator totalItems;
-	private final LongAccumulator errorItems;
-	private final LongAccumulator transformedItems;
-	private final String transformationRule;
-	private final Cleaner cleanFunction;
-
-	private final long dateOfTransformation;
-
-	public TransformFunction(
-		LongAccumulator totalItems,
-		LongAccumulator errorItems,
-		LongAccumulator transformedItems,
-		final String transformationRule,
-		long dateOfTransformation,
-		final Map<String, Vocabulary> vocabularies)
-		throws Exception {
-		this.totalItems = totalItems;
-		this.errorItems = errorItems;
-		this.transformedItems = transformedItems;
-		this.transformationRule = transformationRule;
-		this.dateOfTransformation = dateOfTransformation;
-		cleanFunction = new Cleaner(vocabularies);
-	}
-
-	@Override
-	public MetadataRecord call(MetadataRecord value) {
-		totalItems.add(1);
-		try {
-			Processor processor = new Processor(false);
-			processor.registerExtensionFunction(cleanFunction);
-			final XsltCompiler comp = processor.newXsltCompiler();
-			XsltExecutable xslt = comp
-				.compile(new StreamSource(new ByteArrayInputStream(transformationRule.getBytes())));
-			XdmNode source = processor
-				.newDocumentBuilder()
-				.build(new StreamSource(new ByteArrayInputStream(value.getBody().getBytes())));
-			XsltTransformer trans = xslt.load();
-			trans.setInitialContextNode(source);
-			final StringWriter output = new StringWriter();
-			Serializer out = processor.newSerializer(output);
-			out.setOutputProperty(Serializer.Property.METHOD, "xml");
-			out.setOutputProperty(Serializer.Property.INDENT, "yes");
-			trans.setDestination(out);
-			trans.transform();
-			final String xml = output.toString();
-			value.setBody(xml);
-			value.setDateOfTransformation(dateOfTransformation);
-			transformedItems.add(1);
-			return value;
-		} catch (Throwable e) {
-			errorItems.add(1);
-			return null;
-		}
-	}
-}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java
@ -9,9 +9,15 @@ import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;

-import org.apache.commons.cli.*;
+import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
+import eu.dnetlib.dhp.utils.ISLookupClientFactory;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoder;
 import org.apache.spark.sql.Encoders;
@ -25,9 +31,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob;
 import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
-import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
 import eu.dnetlib.dhp.transformation.vocabulary.VocabularyHelper;
 import eu.dnetlib.dhp.utils.DHPUtils;
 import eu.dnetlib.message.Message;
@ -57,65 +61,39 @@ public class TransformSparkJobNode {

 		final String inputPath = parser.get("input");
 		final String outputPath = parser.get("output");
+		// TODO this variable will be used after implementing Messaging with DNet Aggregator
 		final String workflowId = parser.get("workflowId");
-		final String trasformationRule = extractXSLTFromTR(
-			Objects.requireNonNull(DHPUtils.decompressString(parser.get("transformationRule"))));

-		final String rabbitUser = parser.get("rabbitUser");
-		final String rabbitPassword = parser.get("rabbitPassword");
-		final String rabbitHost = parser.get("rabbitHost");
-		final String rabbitReportQueue = parser.get("rabbitReportQueue");
-		final long dateOfCollection = new Long(parser.get("dateOfCollection"));
-		final boolean test = parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
+		final String isLookupUrl = parser.get("isLookupUrl");
+		log.info(String.format("isLookupUrl: %s", isLookupUrl));
+
+		final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);

 		SparkConf conf = new SparkConf();
 		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
-			spark -> {
-				final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
-				final Dataset<MetadataRecord> mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder);
-				final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems");
-				final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems");
-				final LongAccumulator transformedItems = spark.sparkContext().longAccumulator("transformedItems");
-				final Map<String, Vocabulary> vocabularies = new HashMap<>();
-				vocabularies.put("dnet:languages", VocabularyHelper.getVocabularyFromAPI("dnet:languages"));
-				final TransformFunction transformFunction = new TransformFunction(
-					totalItems,
-					errorItems,
-					transformedItems,
-					trasformationRule,
-					dateOfCollection,
-					vocabularies);
-				mdstoreInput.map(transformFunction, encoder).write().format("parquet").save(outputPath);
-				if (rabbitHost != null) {
-					System.out.println("SEND FINAL REPORT");
-					final Map<String, String> reportMap = new HashMap<>();
-					reportMap.put("inputItem", "" + totalItems.value());
-					reportMap.put("invalidRecords", "" + errorItems.value());
-					reportMap.put("mdStoreSize", "" + transformedItems.value());
-					System.out.println(new Message(workflowId, "Transform", MessageType.REPORT, reportMap));
-					if (!test) {
-						final MessageManager manager = new MessageManager(rabbitHost, rabbitUser, rabbitPassword, false,
-							false,
-							null);
-						manager
-							.sendMessage(
-								new Message(workflowId, "Transform", MessageType.REPORT, reportMap),
-								rabbitReportQueue,
-								true,
-								false);
-						manager.close();
-					}
-				}
-			});
-
+			spark -> transformRecords(parser.getObjectMap(), isLookupService, spark, inputPath, outputPath));
 	}

-	private static String extractXSLTFromTR(final String tr) throws DocumentException {
-		SAXReader reader = new SAXReader();
-		Document document = reader.read(new ByteArrayInputStream(tr.getBytes()));
-		Node node = document.selectSingleNode("//CODE/*[local-name()='stylesheet']");
-		return node.asXML();
+
+	public static void transformRecords(final Map<String,String>args, final ISLookUpService isLookUpService, final SparkSession spark, final String inputPath, final String outputPath) throws DnetTransformationException {
+
+		final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems");
+		final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems");
+		final LongAccumulator transformedItems = spark.sparkContext().longAccumulator("transformedItems");
+		final AggregationCounter ct = new AggregationCounter(totalItems, errorItems,transformedItems );
+		final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
+		final Dataset<MetadataRecord> mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder);
+		final MapFunction<MetadataRecord, MetadataRecord> XSLTTransformationFunction = TransformationFactory.getTransformationPlugin(args,ct, isLookUpService);
+		mdstoreInput.map(XSLTTransformationFunction, encoder).write().save(outputPath);
+
+		log.info("Transformed item "+ ct.getProcessedItems().count());
+		log.info("Total item "+ ct.getTotalItems().count());
+		log.info("Transformation Error item "+ ct.getErrorItems().count());
 	}
+
+
+
+
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java
@ -0,0 +1,62 @@
+package eu.dnetlib.dhp.transformation;
+
+import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
+import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.spark.api.java.function.MapFunction;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.Map;
+
+public class TransformationFactory {
+
+    private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class);
+    public static final  String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//TITLE = \"%s\" return $x//CODE/text()";
+
+
+    public static MapFunction<MetadataRecord, MetadataRecord> getTransformationPlugin(final Map<String,String> jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService) throws DnetTransformationException {
+
+        try {
+            final String transformationPlugin = jobArgument.get("transformationPlugin");
+
+            log.info("Transformation plugin required "+transformationPlugin);
+            switch (transformationPlugin) {
+                case "XSLT_TRANSFORM": {
+                    final String transformationRuleName = jobArgument.get("transformationRule");
+                    if (StringUtils.isBlank(transformationRuleName))
+                        throw new DnetTransformationException("Missing Parameter transformationRule");
+                    final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService);
+
+                    final String transformationRule = queryTransformationRuleFromIS(transformationRuleName, isLookupService);
+
+                    final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation"));
+                    return new XSLTTransformationFunction(counters,transformationRule,dateOfTransformation,vocabularies);
+
+                }
+                default:
+                    throw new DnetTransformationException("transformation plugin does not exists for " + transformationPlugin);
+
+            }
+
+        } catch (Throwable e) {
+            throw new DnetTransformationException(e);
+        }
+    }
+
+    private static String queryTransformationRuleFromIS(final String transformationRuleName, final ISLookUpService isLookUpService) throws  Exception {
+        final String query = String.format(TRULE_XQUERY, transformationRuleName);
+        log.info("asking query to IS: "+ query);
+        List<String> result = isLookUpService.quickSearchProfile(query);
+
+        if (result==null || result.isEmpty())
+            throw new DnetTransformationException("Unable to find transformation rule with name: "+ transformationRuleName);
+        return result.get(0);
+    }
+
+
+}
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/functions/Cleaner.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/functions/Cleaner.java
@ -1,19 +1,17 @@

-package eu.dnetlib.dhp.transformation.functions;
+package eu.dnetlib.dhp.transformation.xslt;

-import java.util.Map;
-import java.util.Optional;

-import eu.dnetlib.dhp.transformation.vocabulary.Term;
-import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
 import net.sf.saxon.s9api.*;
 import scala.Serializable;

 public class Cleaner implements ExtensionFunction, Serializable {

-	private final Map<String, Vocabulary> vocabularies;
+	private final VocabularyGroup vocabularies;

-	public Cleaner(Map<String, Vocabulary> vocabularies) {
+	public Cleaner(final VocabularyGroup vocabularies) {
 		this.vocabularies = vocabularies;
 	}

@ -39,14 +37,9 @@ public class Cleaner implements ExtensionFunction, Serializable {
 	public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
 		final String currentValue = xdmValues[0].itemAt(0).getStringValue();
 		final String vocabularyName = xdmValues[1].itemAt(0).getStringValue();
-		Optional<Term> cleanedValue = vocabularies
-			.get(vocabularyName)
-			.getTerms()
-			.stream()
-			.filter(it -> it.getNativeName().equalsIgnoreCase(currentValue))
-			.findAny();
+		Qualifier cleanedValue = vocabularies.getSynonymAsQualifier(vocabularyName, currentValue);

 		return new XdmAtomicValue(
-			cleanedValue.isPresent() ? cleanedValue.get().getCode() : currentValue);
+			cleanedValue != null  ? cleanedValue.getClassid() : currentValue);
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
@ -0,0 +1,66 @@
+
+package eu.dnetlib.dhp.transformation.xslt;
+
+import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
+import net.sf.saxon.s9api.*;
+import org.apache.spark.api.java.function.MapFunction;
+
+import javax.xml.transform.stream.StreamSource;
+import java.io.ByteArrayInputStream;
+import java.io.StringWriter;
+
+public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord> {
+
+    private final AggregationCounter aggregationCounter;
+
+    private final String transformationRule;
+
+    private final Cleaner cleanFunction;
+
+    private final long dateOfTransformation;
+
+    public XSLTTransformationFunction(
+            final AggregationCounter aggregationCounter,
+            final String transformationRule,
+            long dateOfTransformation,
+            final VocabularyGroup vocabularies)
+            throws Exception {
+        this.aggregationCounter = aggregationCounter;
+        this.transformationRule = transformationRule;
+        this.dateOfTransformation = dateOfTransformation;
+        cleanFunction = new Cleaner(vocabularies);
+    }
+
+    @Override
+    public MetadataRecord call(MetadataRecord value) {
+        aggregationCounter.getTotalItems().add(1);
+        try {
+            Processor processor = new Processor(false);
+            processor.registerExtensionFunction(cleanFunction);
+            final XsltCompiler comp = processor.newXsltCompiler();
+            XsltExecutable xslt = comp
+                    .compile(new StreamSource(new ByteArrayInputStream(transformationRule.getBytes())));
+            XdmNode source = processor
+                    .newDocumentBuilder()
+                    .build(new StreamSource(new ByteArrayInputStream(value.getBody().getBytes())));
+            XsltTransformer trans = xslt.load();
+            trans.setInitialContextNode(source);
+            final StringWriter output = new StringWriter();
+            Serializer out = processor.newSerializer(output);
+            out.setOutputProperty(Serializer.Property.METHOD, "xml");
+            out.setOutputProperty(Serializer.Property.INDENT, "yes");
+            trans.setDestination(out);
+            trans.transform();
+            final String xml = output.toString();
+            value.setBody(xml);
+            value.setDateOfTransformation(dateOfTransformation);
+            aggregationCounter.getProcessedItems().add(1);
+            return value;
+        } catch (Throwable e) {
+            aggregationCounter.getErrorItems().add(1);
+            return null;
+        }
+    }
+}
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/transformation_input_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/transformation/transformation_input_parameters.json
@ -7,7 +7,7 @@
  },
  {
    "paramName": "d",
-    "paramLongName": "dateOfCollection",
+    "paramLongName": "dateOfTransformation",
    "paramDescription": "the date when the record has been stored",
    "paramRequired": true
  },
@ -36,39 +36,9 @@
    "paramRequired": true
  },
  {
-    "paramName": "ru",
-    "paramLongName": "rabbitUser",
-    "paramDescription": "the user to connect with RabbitMq for messaging",
+    "paramName": "tp",
+    "paramLongName": "transformationPlugin",
+    "paramDescription": "the transformation plugin to apply",
    "paramRequired": true
-  },
-  {
-    "paramName": "rp",
-    "paramLongName": "rabbitPassword",
-    "paramDescription": "the password to connect with RabbitMq for messaging",
-    "paramRequired": true
-  },
-  {
-    "paramName": "rh",
-    "paramLongName": "rabbitHost",
-    "paramDescription": "the host of the RabbitMq server",
-    "paramRequired": true
-  },
-  {
-    "paramName": "ro",
-    "paramLongName": "rabbitOngoingQueue",
-    "paramDescription": "the name of the ongoing queue",
-    "paramRequired": true
-  },
-  {
-    "paramName": "rr",
-    "paramLongName": "rabbitReportQueue",
-    "paramDescription": "the name of the report queue",
-    "paramRequired": true
-  },
-  {
-    "paramName": "t",
-    "paramLongName": "isTest",
-    "paramDescription": "the name of the report queue",
-    "paramRequired": false
  }
 ]
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java
@ -2,15 +2,23 @@
 package eu.dnetlib.dhp.transformation;

 import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.mockito.Mockito.lenient;

+import java.io.IOException;
 import java.io.StringWriter;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;

 import javax.xml.transform.stream.StreamSource;

+import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
+import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
+import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.SparkSession;
@ -18,28 +26,34 @@ import org.apache.spark.util.LongAccumulator;
 import org.dom4j.Document;
 import org.dom4j.Node;
 import org.dom4j.io.SAXReader;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.DisplayName;
-import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.*;
 import org.junit.jupiter.api.extension.ExtendWith;
 import org.junit.jupiter.api.io.TempDir;
 import org.mockito.Mock;
 import org.mockito.junit.jupiter.MockitoExtension;
-
 import eu.dnetlib.dhp.collection.CollectionJobTest;
 import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
-import eu.dnetlib.dhp.transformation.functions.Cleaner;
-import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
-import eu.dnetlib.dhp.transformation.vocabulary.VocabularyHelper;
-import eu.dnetlib.dhp.utils.DHPUtils;
-import net.sf.saxon.s9api.*;

@ExtendWith(MockitoExtension.class)
 public class TransformationJobTest {

 	private static SparkSession spark;

+	@Mock
+	private ISLookUpService isLookUpService;
+
+	private VocabularyGroup vocabularies;
+
+	@BeforeEach
+	public void setUp() throws ISLookUpException, IOException {
+		lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
+
+		lenient()
+				.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
+				.thenReturn(synonyms());
+		vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
+	}
+
 	@BeforeAll
 	public static void beforeAll() {
 		SparkConf conf = new SparkConf();
@ -53,64 +67,51 @@ public class TransformationJobTest {
 		spark.stop();
 	}

-	@Mock
-	private LongAccumulator accumulator;

 	@Test
+	@DisplayName("Test Transform Single XML using XSLTTransformator")
 	public void testTransformSaxonHE() throws Exception {

-		Map<String, Vocabulary> vocabularies = new HashMap<>();
-		vocabularies.put("dnet:languages", VocabularyHelper.getVocabularyFromAPI("dnet:languages"));
-		Cleaner cleanFunction = new Cleaner(vocabularies);
-		Processor proc = new Processor(false);
-		proc.registerExtensionFunction(cleanFunction);
-		final XsltCompiler comp = proc.newXsltCompiler();
-		XsltExecutable exp = comp
-			.compile(
-				new StreamSource(
-					this.getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/ext_simple.xsl")));
-		XdmNode source = proc
-			.newDocumentBuilder()
-			.build(
-				new StreamSource(
-					this.getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input.xml")));
-		XsltTransformer trans = exp.load();
-		trans.setInitialContextNode(source);
-		final StringWriter output = new StringWriter();
-		Serializer out = proc.newSerializer(output);
-		out.setOutputProperty(Serializer.Property.METHOD, "xml");
-		out.setOutputProperty(Serializer.Property.INDENT, "yes");
-		trans.setDestination(out);
-		trans.transform();
-		System.out.println(output.toString());
+		// We Set the input Record getting the XML from the classpath
+		final MetadataRecord mr = new MetadataRecord();
+		mr.setBody(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input.xml")));
+
+
+		// We Load the XSLT trasformation Rule from the classpath
+		XSLTTransformationFunction tr = loadTransformationRule("/eu/dnetlib/dhp/transform/ext_simple.xsl");
+
+		//Print the record
+		System.out.println(tr.call(mr).getBody());
+		//TODO Create significant Assert
+
 	}

+
+
+
 	@DisplayName("Test TransformSparkJobNode.main")
 	@Test
 	public void transformTest(@TempDir Path testDir) throws Exception {
+
 		final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
 		final String mdstore_output = testDir.toString() + "/version";
-		final String xslt = DHPUtils
-			.compressString(
-				IOUtils
-					.toString(
-						this.getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/tr.xml")));
-		TransformSparkJobNode
-			.main(
-				new String[] {
-					"-issm", "true",
-					"-i", mdstore_input,
-					"-o", mdstore_output,
-					"-d", "1",
-					"-w", "1",
-					"-tr", xslt,
-					"-t", "true",
-					"-ru", "",
-					"-rp", "",
-					"-rh", "",
-					"-ro", "",
-					"-rr", ""
-				});
+
+
+		mockupTrasformationRule("simpleTRule","/eu/dnetlib/dhp/transform/ext_simple.xsl");
+
+//		final String arguments = "-issm true -i %s -o %s -d 1 -w 1 -tp XSLT_TRANSFORM -tr simpleTRule";
+
+		final Map<String,String > parameters =  Stream.of(new String[][] {
+				{ "dateOfTransformation", "1234" },
+				{ "transformationPlugin", "XSLT_TRANSFORM" },
+				{ "transformationRule", "simpleTRule" },
+
+		}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
+
+		TransformSparkJobNode.transformRecords(parameters,isLookUpService,spark,mdstore_input, mdstore_output);
+
+
+

 		// TODO introduce useful assertions
 	}
@ -127,39 +128,27 @@ public class TransformationJobTest {
 		Files.deleteIfExists(tempDirWithPrefix);
 	}

-	@Test
-	public void testTransformFunction() throws Exception {
-		SAXReader reader = new SAXReader();
-		Document document = reader.read(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/tr.xml"));
-		Node node = document.selectSingleNode("//CODE/*[local-name()='stylesheet']");
-		final String xslt = node.asXML();
-		Map<String, Vocabulary> vocabularies = new HashMap<>();
-		vocabularies.put("dnet:languages", VocabularyHelper.getVocabularyFromAPI("dnet:languages"));
-
-		TransformFunction tf = new TransformFunction(accumulator, accumulator, accumulator, xslt, 1, vocabularies);
-
-		MetadataRecord record = new MetadataRecord();
-		record
-			.setBody(
-				IOUtils
-					.toString(
-						this.getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input.xml")));
-
-		final MetadataRecord result = tf.call(record);
-		assertNotNull(result.getBody());
-
-		System.out.println(result.getBody());
-	}

-	@Test
-	public void extractTr() throws Exception {
+	private void mockupTrasformationRule(final String trule, final String path)throws Exception {
+		final String trValue = IOUtils.toString(this.getClass().getResourceAsStream(path));

-		final String xmlTr = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/tr.xml"));
+		lenient().when(isLookUpService.quickSearchProfile(String.format(TransformationFactory.TRULE_XQUERY,trule)))
+				.thenReturn(Collections.singletonList(trValue));
+	}
+
+	private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {
+		final String trValue = IOUtils.toString(this.getClass().getResourceAsStream(path));
+		final LongAccumulator la = new LongAccumulator();
+		return new XSLTTransformationFunction(new AggregationCounter(la,la,la),trValue, 0,vocabularies);
+	}

-		SAXReader reader = new SAXReader();
-		Document document = reader.read(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/tr.xml"));
-		Node node = document.selectSingleNode("//CODE/*[local-name()='stylesheet']");
+	private List<String> vocs() throws IOException {
+		return IOUtils
+				.readLines(TransformationJobTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/terms.txt"));
+	}

-		System.out.println(node.asXML());
+	private List<String> synonyms() throws IOException {
+		return IOUtils
+				.readLines(TransformationJobTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/synonyms.txt"));
 	}
 }
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl
@ -1,15 +1,16 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:oai="http://www.openarchives.org/OAI/2.0/"
                xmlns:oaf="http://namespace.openaire.eu/oaf"
-                xmlns:dnetFunction="http://eu/dnetlib/trasform/extension"
+                xmlns:vocabulary="http://eu/dnetlib/trasform/extension"
+                xmlns:dr="http://www.driver-repository.eu/namespace/dr"
                version="2.0"
-                exclude-result-prefixes="xsl">
+                exclude-result-prefixes="xsl vocabulary">
    <xsl:template match="/">
        <oai:record>
            <xsl:copy-of select="//oai:header"/>
            <metadata>
-                <xsl:for-each select="//*[local-name()='subject']">
-                    <subject><xsl:value-of select="dnetFunction:clean(.,'dnet:languages')"/></subject>
+                <xsl:for-each select="//oai:set">
+                    <dr:CobjCategory><xsl:value-of select="vocabulary:clean(.,'dnet:publication_resource')"/></dr:CobjCategory>
                </xsl:for-each>
            </metadata>
            <oaf:about>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml
@ -1,37 +1,68 @@
-<record xmlns="http://www.openarchives.org/OAI/2.0/">
-    <header>
-        <identifier>oai:research.chalmers.se:243692</identifier>
-        <datestamp>2018-01-25T18:04:43Z</datestamp>
-        <setSpec>openaire</setSpec>
-    </header>
-    <metadata>
-        <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
-            <dc:title>Incipient Berezinskii-Kosterlitz-Thouless transition in two-dimensional coplanar Josephson junctions</dc:title>
-            <dc:identifier>https://research.chalmers.se/en/publication/243692</dc:identifier>
-            <dc:date>2016</dc:date>
-            <dc:creator>Massarotti, D.</dc:creator>
-            <dc:creator>Jouault, B.</dc:creator>
-            <dc:creator>Rouco, V.</dc:creator>
-            <dc:creator>Charpentier, Sophie</dc:creator>
-            <dc:creator>Bauch, Thilo</dc:creator>
-            <dc:creator>Michon, A.</dc:creator>
-            <dc:creator>De Candia, A.</dc:creator>
-            <dc:creator>Lucignano, P.</dc:creator>
-            <dc:creator>Lombardi, Floriana</dc:creator>
-            <dc:creator>Tafuri, F.</dc:creator>
-            <dc:creator>Tagliacozzo, A.</dc:creator>
-            <dc:subject>Acoli</dc:subject>
-            <dc:subject>Abkhazian</dc:subject>
-            <dc:subject>Condensed Matter Physics</dc:subject>
-            <dc:description>Superconducting hybrid junctions are revealing a variety of effects. Some of them are due to the special layout of these devices, which often use a coplanar configuration with relatively large barrier channels and the possibility of hosting Pearl vortices. A Josephson junction with a quasi-ideal two-dimensional barrier has been realized by growing graphene on SiC with Al electrodes. Chemical vapor deposition offers centimeter size monolayer areas where it is possible to realize a comparative analysis of different devices with nominally the same barrier. In samples with a graphene gap below 400 nm, we have found evidence of Josephson coherence in the presence of an incipient Berezinskii-Kosterlitz-Thouless transition. When the magnetic field is cycled, a remarkable hysteretic collapse and revival of the Josephson supercurrent occurs. Similar hysteresis are found in granular systems and are usually justified within the Bean critical state model (CSM). We show that the CSM, with appropriate account for the low-dimensional geometry, can partly explain the odd features measured in these junctions.</dc:description>
-            <dc:relation>info:eu-repo/grantAgreement/EC/FP7/604391//Graphene-Based Revolutions in ICT And Beyond (Graphene Flagship)/</dc:relation>
-            <dc:relation>info:eu-repo/semantics/altIdentifier/doi/10.1103/PhysRevB.94.054525</dc:relation>
-            <dc:type>info:eu-repo/semantics/article</dc:type>
-            <dc:source>Physical Review B vol.94(2016)</dc:source>
-            <dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
+<?xml version="1.0" encoding="UTF-8"?>
+<oai:record xmlns="http://namespace.openaire.eu/"
+            xmlns:dc="http://purl.org/dc/elements/1.1/"
+            xmlns:dr="http://www.driver-repository.eu/namespace/dr"
+            xmlns:dri="http://www.driver-repository.eu/namespace/dri"
+            xmlns:oaf="http://namespace.openaire.eu/oaf"
+            xmlns:oai="http://www.openarchives.org/OAI/2.0/"
+            xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <oai:header>
+        <dri:objIdentifier>od______2294::00029b7f0a2a7e090e55b625a9079d83</dri:objIdentifier>
+        <dri:recordIdentifier>oai:pub.uni-bielefeld.de:2578942</dri:recordIdentifier>
+        <dri:dateOfCollection>2018-11-23T15:15:33.974+01:00</dri:dateOfCollection>
+        <oaf:datasourceprefix>od______2294</oaf:datasourceprefix>
+        <identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:pub.uni-bielefeld.de:2578942</identifier>
+        <datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2018-07-24T13:01:16Z</datestamp>
+        <setSpec xmlns="http://www.openarchives.org/OAI/2.0/">conference</setSpec>
+        <setSpec xmlns="http://www.openarchives.org/OAI/2.0/">ddc:000</setSpec>
+        <setSpec xmlns="http://www.openarchives.org/OAI/2.0/">conferenceFtxt</setSpec>
+        <setSpec xmlns="http://www.openarchives.org/OAI/2.0/">driver</setSpec>
+        <setSpec xmlns="http://www.openarchives.org/OAI/2.0/">open_access</setSpec>
+    </oai:header>
+    <metadata xmlns="http://www.openarchives.org/OAI/2.0/">
+        <oai_dc:dc xmlns="http://www.openarchives.org/OAI/2.0/oai_dc/"
+                   xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+            <dc:title>Mobile recommendation agents making online use of visual attention information at the point of sale</dc:title>
+            <dc:creator>Pfeiffer, Thies</dc:creator>
+            <dc:creator>Pfeiffer, Jella</dc:creator>
+            <dc:creator>Meißner, Martin</dc:creator>
+            <dc:creator>Davis, Fred</dc:creator>
+            <dc:creator>Riedl, René</dc:creator>
+            <dc:creator>Jan, vom Brocke</dc:creator>
+            <dc:creator>Léger, Pierre-Majorique</dc:creator>
+            <dc:creator>Randolph, Adriane</dc:creator>
+            <dc:subject>Mobile Cognitive Assistance Systems
+                Information Systems</dc:subject>
+            <dc:subject>ddc:000</dc:subject>
+            <dc:description>We aim to utilize online information about visual attention for developing mobile recommendation agents (RAs) for use at the point of sale. Up to now, most RAs are focussed exclusively at personalization in an e-commerce setting. Very little is known, however, about mobile RAs that offer information and assistance at the point of sale based on individual-level feature based preference models (Murray and Häubl 2009). Current attempts provide information about products at the point of sale by manually scanning barcodes or using RFID (Kowatsch et al. 2011, Heijden 2005), e.g. using specific apps for smartphones. We argue that an online access to the current visual attention of the user offers a much larger potential. Integrating mobile eye tracking into ordinary glasses would yield a direct benefit of applying neuroscience methods in the user’s everyday life. First, learning from consumers’ attentional processes over time and adapting recommendations based on this learning allows us to provide very accurate and relevant recommendations, potentially increasing the perceived usefulness. Second, our proposed system needs little explicit user input (no scanning or navigation on screen) making it easy to use. Thus, instead of learning from click behaviour and past customer ratings, as it is the case in the e-commerce setting, the mobile RA learns from eye movements by participating online in every day decision processes. We argue that mobile RAs should be built based on current research in human judgment and decision making (Murray et al. 2010). In our project, we therefore follow a two-step approach: In the empirical basic research stream, we aim to understand the user’s interaction with the product shelf: the actions and patterns of user’s behaviour (eye movements, gestures, approaching a product closer) and their correspondence to the user’s informational needs. In the empirical system development stream, we create prototypes of mobile RAs and test experimentally the factors that influence the user’s adoption. For example, we suggest that a user’s involvement in the process, such as a need for exact nutritional information or for assistance (e.g., reading support for elderly) will influence the user’s intention to use such as system. The experiments are conducted both in our immersive virtual reality supermarket presented in a CAVE, where we can also easily display information to the user and track the eye movement in great accuracy, as well as in real-world supermarkets (see Figure 1), so that the findings can be better generalized to natural decision situations (Gidlöf et al. 2013). In a first pilot study with five randomly chosen participants in a supermarket, we evaluated which sort of mobile RAs consumers favour in order to get a first impression of the user’s acceptance of the technology. Figure 1 shows an excerpt of one consumer’s eye movements during a decision process. First results show long eye cascades and short fixations on many products in situations where users are uncertain and in need for support. Furthermore, we find a surprising acceptance of the technology itself throughout all ages (23 – 61 years). At the same time, consumers express serious fear of being manipulated by such a technology. For that reason, they strongly prefer the information to be provided by trusted third party or shared with family members and friends (see also Murray and Häubl 2009). Our pilot will be followed by a larger field experiment in March in order to learn more about factors that influence the user’s acceptance as well as the eye movement patterns that reflect typical phases of decision processes and indicate the need for support by a RA.</dc:description>
+            <dc:date>2013</dc:date>
+            <dc:type>info:eu-repo/semantics/conferenceObject</dc:type>
+            <dc:type>doc-type:conferenceObject</dc:type>
+            <dc:type>text</dc:type>
+            <dc:identifier>https://pub.uni-bielefeld.de/record/2578942</dc:identifier>
+            <dc:identifier>https://pub.uni-bielefeld.de/download/2578942/2602478</dc:identifier>
+            <dc:source>Pfeiffer T, Pfeiffer J, Meißner M. Mobile recommendation agents making online use of visual attention information at the point of sale. In: Davis F, Riedl R, Jan vom B, Léger P-M, Randolph A, eds. <em>Proceedings of the Gmunden Retreat on NeuroIS 2013</em>. 2013: 3-3.</dc:source>
            <dc:language>eng</dc:language>
-            <dc:audience>Researchers</dc:audience>
-            <dc:format>application/pdf</dc:format>
+            <dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
        </oai_dc:dc>
    </metadata>
-</record>
+    <about xmlns="">
+        <provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
+            <originDescription altered="true" harvestDate="2018-11-23T15:15:33.974+01:00">
+                <baseURL>http://pub.uni-bielefeld.de/oai</baseURL>
+                <identifier>oai:pub.uni-bielefeld.de:2578942</identifier>
+                <datestamp>2018-07-24T13:01:16Z</datestamp>
+                <metadataNamespace>http://www.openarchives.org/OAI/2.0/oai_dc/</metadataNamespace>
+            </originDescription>
+        </provenance>
+        <oaf:datainfo>
+            <oaf:inferred>false</oaf:inferred>
+            <oaf:deletedbyinference>false</oaf:deletedbyinference>
+            <oaf:trust>0.9</oaf:trust>
+            <oaf:inferenceprovenance/>
+            <oaf:provenanceaction classid="sysimport:crosswalk:repository"
+                                  classname="sysimport:crosswalk:repository"
+                                  schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
+        </oaf:datainfo>
+    </about>
+</oai:record>
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/synonyms.txt
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/synonyms.txt
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt