cleanup on transformation functions and the relative tests

This commit is contained in:
Claudio Atzori 2021-02-24 15:07:59 +01:00
parent fc3fa5e343
commit 9c899f4433
6 changed files with 51 additions and 67 deletions

View File

@ -4,7 +4,10 @@ package eu.dnetlib.dhp.transformation.xslt;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Qualifier;
import net.sf.saxon.s9api.*; import net.sf.saxon.s9api.*;
import scala.Serializable;
import java.io.Serializable;
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
public class Cleaner implements ExtensionFunction, Serializable { public class Cleaner implements ExtensionFunction, Serializable {
@ -16,7 +19,7 @@ public class Cleaner implements ExtensionFunction, Serializable {
@Override @Override
public QName getName() { public QName getName() {
return new QName("http://eu/dnetlib/transform/extension", "clean"); return new QName(QNAME_BASE_URI + "/clean", "clean");
} }
@Override @Override

View File

@ -1,6 +1,7 @@
package eu.dnetlib.dhp.transformation.xslt; package eu.dnetlib.dhp.transformation.xslt;
import java.io.Serializable;
import java.time.LocalDate; import java.time.LocalDate;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.util.*; import java.util.*;
@ -8,7 +9,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import net.sf.saxon.s9api.*; import net.sf.saxon.s9api.*;
import scala.Serializable;
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
public class DateCleaner implements ExtensionFunction, Serializable { public class DateCleaner implements ExtensionFunction, Serializable {
@ -91,7 +93,7 @@ public class DateCleaner implements ExtensionFunction, Serializable {
@Override @Override
public QName getName() { public QName getName() {
return new QName("http://eu/dnetlib/trasform/dates", "dateISO"); return new QName(QNAME_BASE_URI + "/dateISO", "dateISO");
} }
@Override @Override

View File

@ -15,6 +15,8 @@ import net.sf.saxon.s9api.*;
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord> { public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord> {
public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform";
private final AggregationCounter aggregationCounter; private final AggregationCounter aggregationCounter;
private final String transformationRule; private final String transformationRule;

View File

@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -35,26 +34,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class TransformationJobTest extends AbstractVocabularyTest { public class TransformationJobTest extends AbstractVocabularyTest {
private static SparkSession spark;
@BeforeAll
public static void beforeAll() throws IOException, ISLookUpException {
SparkConf conf = new SparkConf();
conf.setAppName(TransformationJobTest.class.getSimpleName());
conf.setMaster("local");
spark = SparkSession.builder().config(conf).getOrCreate();
}
@BeforeEach @BeforeEach
public void setUp() throws IOException, ISLookUpException { public void setUp() throws IOException, ISLookUpException {
setUpVocabulary(); setUpVocabulary();
} }
@AfterAll
public static void afterAll() {
spark.stop();
}
@Test @Test
@DisplayName("Test Date cleaner") @DisplayName("Test Date cleaner")
public void testDateCleaner() throws Exception { public void testDateCleaner() throws Exception {
@ -82,68 +66,61 @@ public class TransformationJobTest extends AbstractVocabularyTest {
// Print the record // Print the record
System.out.println(result.getBody()); System.out.println(result.getBody());
// TODO Create significant Assert // TODO Create significant Assert
} }
@DisplayName("Test TransformSparkJobNode.main")
@Test @Test
@DisplayName("Test TransformSparkJobNode.main")
public void transformTest(@TempDir Path testDir) throws Exception { public void transformTest(@TempDir Path testDir) throws Exception {
final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile(); SparkConf conf = new SparkConf();
final String mdstore_output = testDir.toString() + "/version"; conf.setAppName(TransformationJobTest.class.getSimpleName());
conf.setMaster("local");
mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl"); try(SparkSession spark = SparkSession.builder().config(conf).getOrCreate()) {
final Map<String, String> parameters = Stream.of(new String[][] { final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
{ final String mdstore_output = testDir.toString() + "/version";
"dateOfTransformation", "1234"
},
{
"transformationPlugin", "XSLT_TRANSFORM"
},
{
"transformationRuleId", "simpleTRule"
},
}).collect(Collectors.toMap(data -> data[0], data -> data[1])); mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl");
TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output); final Map<String, String> parameters = Stream.of(new String[][]{
{
"dateOfTransformation", "1234"
},
{
"transformationPlugin", "XSLT_TRANSFORM"
},
{
"transformationRuleId", "simpleTRule"
},
// TODO introduce useful assertions }).collect(Collectors.toMap(data -> data[0], data -> data[1]));
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class); TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output);
final Dataset<MetadataRecord> mOutput = spark
.read()
.format("parquet")
.load(mdstore_output + MDSTORE_DATA_PATH)
.as(encoder);
final Long total = mOutput.count(); // TODO introduce useful assertions
final long recordTs = mOutput final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
.filter((FilterFunction<MetadataRecord>) p -> p.getDateOfTransformation() == 1234) final Dataset<MetadataRecord> mOutput = spark
.count(); .read()
.format("parquet")
.load(mdstore_output + MDSTORE_DATA_PATH)
.as(encoder);
final long recordNotEmpty = mOutput final Long total = mOutput.count();
.filter((FilterFunction<MetadataRecord>) p -> !StringUtils.isBlank(p.getBody()))
.count();
assertEquals(total, recordTs); final long recordTs = mOutput
.filter((FilterFunction<MetadataRecord>) p -> p.getDateOfTransformation() == 1234)
.count();
assertEquals(total, recordNotEmpty); final long recordNotEmpty = mOutput
.filter((FilterFunction<MetadataRecord>) p -> !StringUtils.isBlank(p.getBody()))
.count();
} assertEquals(total, recordTs);
@Test assertEquals(total, recordNotEmpty);
public void tryLoadFolderOnCP() throws Exception { }
final String path = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
System.out.println("path = " + path);
Path tempDirWithPrefix = Files.createTempDirectory("mdstore_output");
System.out.println(tempDirWithPrefix.toFile().getAbsolutePath());
Files.deleteIfExists(tempDirWithPrefix);
} }
private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception { private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {

View File

@ -1,7 +1,7 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:vocabulary="http://eu/dnetlib/transform/extension" xmlns:vocabulary="http://eu/dnetlib/transform/clean"
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dr="http://www.driver-repository.eu/namespace/dr"
version="2.0" version="2.0"
exclude-result-prefixes="xsl vocabulary"> exclude-result-prefixes="xsl vocabulary">

View File

@ -3,8 +3,8 @@
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:vocabulary="http://eu/dnetlib/trasform/extension" xmlns:vocabulary="http://eu/dnetlib/transform/clean"
xmlns:dateCleaner="http://eu/dnetlib/trasform/dates" xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dr="http://www.driver-repository.eu/namespace/dr"
exclude-result-prefixes="xsl vocabulary dateCleaner"> exclude-result-prefixes="xsl vocabulary dateCleaner">
<xsl:param name="varOfficialName"/> <xsl:param name="varOfficialName"/>