From 9c899f44335278053fb47524dd112ff476a15488 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 24 Feb 2021 15:07:59 +0100 Subject: [PATCH] cleanup on transformation functions and the relative tests --- .../dhp/transformation/xslt/Cleaner.java | 7 +- .../dhp/transformation/xslt/DateCleaner.java | 6 +- .../xslt/XSLTTransformationFunction.java | 2 + .../transformation/TransformationJobTest.java | 97 +++++++------------ .../eu/dnetlib/dhp/transform/ext_simple.xsl | 2 +- .../eu/dnetlib/dhp/transform/zenodo_tr.xslt | 4 +- 6 files changed, 51 insertions(+), 67 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/Cleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/Cleaner.java index 124f68325..50ffd304b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/Cleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/Cleaner.java @@ -4,7 +4,10 @@ package eu.dnetlib.dhp.transformation.xslt; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.Qualifier; import net.sf.saxon.s9api.*; -import scala.Serializable; + +import java.io.Serializable; + +import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI; public class Cleaner implements ExtensionFunction, Serializable { @@ -16,7 +19,7 @@ public class Cleaner implements ExtensionFunction, Serializable { @Override public QName getName() { - return new QName("http://eu/dnetlib/transform/extension", "clean"); + return new QName(QNAME_BASE_URI + "/clean", "clean"); } @Override diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java index 4e1a29b52..479dd9854 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.transformation.xslt; +import java.io.Serializable; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -8,7 +9,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import net.sf.saxon.s9api.*; -import scala.Serializable; + +import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI; public class DateCleaner implements ExtensionFunction, Serializable { @@ -91,7 +93,7 @@ public class DateCleaner implements ExtensionFunction, Serializable { @Override public QName getName() { - return new QName("http://eu/dnetlib/trasform/dates", "dateISO"); + return new QName(QNAME_BASE_URI + "/dateISO", "dateISO"); } @Override diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index 7d47cc84d..a813d84db 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -15,6 +15,8 @@ import net.sf.saxon.s9api.*; public class XSLTTransformationFunction implements MapFunction { + public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform"; + private final AggregationCounter aggregationCounter; private final String transformationRule; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 091089eb9..50aa2ea08 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH; import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; import java.util.Map; import java.util.stream.Collectors; @@ -35,26 +34,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @ExtendWith(MockitoExtension.class) public class TransformationJobTest extends AbstractVocabularyTest { - private static SparkSession spark; - - @BeforeAll - public static void beforeAll() throws IOException, ISLookUpException { - SparkConf conf = new SparkConf(); - conf.setAppName(TransformationJobTest.class.getSimpleName()); - conf.setMaster("local"); - spark = SparkSession.builder().config(conf).getOrCreate(); - } - @BeforeEach public void setUp() throws IOException, ISLookUpException { setUpVocabulary(); } - @AfterAll - public static void afterAll() { - spark.stop(); - } - @Test @DisplayName("Test Date cleaner") public void testDateCleaner() throws Exception { @@ -82,68 +66,61 @@ public class TransformationJobTest extends AbstractVocabularyTest { // Print the record System.out.println(result.getBody()); // TODO Create significant Assert - } - @DisplayName("Test TransformSparkJobNode.main") @Test + @DisplayName("Test TransformSparkJobNode.main") public void transformTest(@TempDir Path testDir) throws Exception { - final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile(); - final String mdstore_output = testDir.toString() + "/version"; + SparkConf conf = new SparkConf(); + conf.setAppName(TransformationJobTest.class.getSimpleName()); + conf.setMaster("local"); - mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl"); + try(SparkSession spark = SparkSession.builder().config(conf).getOrCreate()) { - final Map parameters = Stream.of(new String[][] { - { - "dateOfTransformation", "1234" - }, - { - "transformationPlugin", "XSLT_TRANSFORM" - }, - { - "transformationRuleId", "simpleTRule" - }, + final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile(); + final String mdstore_output = testDir.toString() + "/version"; - }).collect(Collectors.toMap(data -> data[0], data -> data[1])); + mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl"); - TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output); + final Map parameters = Stream.of(new String[][]{ + { + "dateOfTransformation", "1234" + }, + { + "transformationPlugin", "XSLT_TRANSFORM" + }, + { + "transformationRuleId", "simpleTRule" + }, - // TODO introduce useful assertions + }).collect(Collectors.toMap(data -> data[0], data -> data[1])); - final Encoder encoder = Encoders.bean(MetadataRecord.class); - final Dataset mOutput = spark - .read() - .format("parquet") - .load(mdstore_output + MDSTORE_DATA_PATH) - .as(encoder); + TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output); - final Long total = mOutput.count(); + // TODO introduce useful assertions - final long recordTs = mOutput - .filter((FilterFunction) p -> p.getDateOfTransformation() == 1234) - .count(); + final Encoder encoder = Encoders.bean(MetadataRecord.class); + final Dataset mOutput = spark + .read() + .format("parquet") + .load(mdstore_output + MDSTORE_DATA_PATH) + .as(encoder); - final long recordNotEmpty = mOutput - .filter((FilterFunction) p -> !StringUtils.isBlank(p.getBody())) - .count(); + final Long total = mOutput.count(); - assertEquals(total, recordTs); + final long recordTs = mOutput + .filter((FilterFunction) p -> p.getDateOfTransformation() == 1234) + .count(); - assertEquals(total, recordNotEmpty); + final long recordNotEmpty = mOutput + .filter((FilterFunction) p -> !StringUtils.isBlank(p.getBody())) + .count(); - } + assertEquals(total, recordTs); - @Test - public void tryLoadFolderOnCP() throws Exception { - final String path = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile(); - System.out.println("path = " + path); - - Path tempDirWithPrefix = Files.createTempDirectory("mdstore_output"); - - System.out.println(tempDirWithPrefix.toFile().getAbsolutePath()); - - Files.deleteIfExists(tempDirWithPrefix); + assertEquals(total, recordNotEmpty); + } } private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception { diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl index e2a439315..8f8ce2270 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl @@ -1,7 +1,7 @@ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/zenodo_tr.xslt b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/zenodo_tr.xslt index 23e57579b..9a02c9071 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/zenodo_tr.xslt +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/zenodo_tr.xslt @@ -3,8 +3,8 @@ xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:oaf="http://namespace.openaire.eu/oaf" - xmlns:vocabulary="http://eu/dnetlib/trasform/extension" - xmlns:dateCleaner="http://eu/dnetlib/trasform/dates" + xmlns:vocabulary="http://eu/dnetlib/transform/clean" + xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO" xmlns:dr="http://www.driver-repository.eu/namespace/dr" exclude-result-prefixes="xsl vocabulary dateCleaner">