cleanup on transformation functions and the relative tests

This commit is contained in:
Claudio Atzori 2021-02-24 15:07:59 +01:00
parent fc3fa5e343
commit 9c899f4433
6 changed files with 51 additions and 67 deletions

View File

@ -4,7 +4,10 @@ package eu.dnetlib.dhp.transformation.xslt;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import net.sf.saxon.s9api.*;
import scala.Serializable;
import java.io.Serializable;
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
public class Cleaner implements ExtensionFunction, Serializable {
@ -16,7 +19,7 @@ public class Cleaner implements ExtensionFunction, Serializable {
@Override
public QName getName() {
return new QName("http://eu/dnetlib/transform/extension", "clean");
return new QName(QNAME_BASE_URI + "/clean", "clean");
}
@Override

View File

@ -1,6 +1,7 @@
package eu.dnetlib.dhp.transformation.xslt;
import java.io.Serializable;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.*;
@ -8,7 +9,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.saxon.s9api.*;
import scala.Serializable;
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
public class DateCleaner implements ExtensionFunction, Serializable {
@ -91,7 +93,7 @@ public class DateCleaner implements ExtensionFunction, Serializable {
@Override
public QName getName() {
return new QName("http://eu/dnetlib/trasform/dates", "dateISO");
return new QName(QNAME_BASE_URI + "/dateISO", "dateISO");
}
@Override

View File

@ -15,6 +15,8 @@ import net.sf.saxon.s9api.*;
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord> {
public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform";
private final AggregationCounter aggregationCounter;
private final String transformationRule;

View File

@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map;
import java.util.stream.Collectors;
@ -35,26 +34,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
@ExtendWith(MockitoExtension.class)
public class TransformationJobTest extends AbstractVocabularyTest {
private static SparkSession spark;
@BeforeAll
public static void beforeAll() throws IOException, ISLookUpException {
SparkConf conf = new SparkConf();
conf.setAppName(TransformationJobTest.class.getSimpleName());
conf.setMaster("local");
spark = SparkSession.builder().config(conf).getOrCreate();
}
@BeforeEach
public void setUp() throws IOException, ISLookUpException {
setUpVocabulary();
}
@AfterAll
public static void afterAll() {
spark.stop();
}
@Test
@DisplayName("Test Date cleaner")
public void testDateCleaner() throws Exception {
@ -82,68 +66,61 @@ public class TransformationJobTest extends AbstractVocabularyTest {
// Print the record
System.out.println(result.getBody());
// TODO Create significant Assert
}
@DisplayName("Test TransformSparkJobNode.main")
@Test
@DisplayName("Test TransformSparkJobNode.main")
public void transformTest(@TempDir Path testDir) throws Exception {
final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
final String mdstore_output = testDir.toString() + "/version";
SparkConf conf = new SparkConf();
conf.setAppName(TransformationJobTest.class.getSimpleName());
conf.setMaster("local");
mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl");
try(SparkSession spark = SparkSession.builder().config(conf).getOrCreate()) {
final Map<String, String> parameters = Stream.of(new String[][] {
{
"dateOfTransformation", "1234"
},
{
"transformationPlugin", "XSLT_TRANSFORM"
},
{
"transformationRuleId", "simpleTRule"
},
final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
final String mdstore_output = testDir.toString() + "/version";
}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl");
TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output);
final Map<String, String> parameters = Stream.of(new String[][]{
{
"dateOfTransformation", "1234"
},
{
"transformationPlugin", "XSLT_TRANSFORM"
},
{
"transformationRuleId", "simpleTRule"
},
// TODO introduce useful assertions
}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
final Dataset<MetadataRecord> mOutput = spark
.read()
.format("parquet")
.load(mdstore_output + MDSTORE_DATA_PATH)
.as(encoder);
TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output);
final Long total = mOutput.count();
// TODO introduce useful assertions
final long recordTs = mOutput
.filter((FilterFunction<MetadataRecord>) p -> p.getDateOfTransformation() == 1234)
.count();
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
final Dataset<MetadataRecord> mOutput = spark
.read()
.format("parquet")
.load(mdstore_output + MDSTORE_DATA_PATH)
.as(encoder);
final long recordNotEmpty = mOutput
.filter((FilterFunction<MetadataRecord>) p -> !StringUtils.isBlank(p.getBody()))
.count();
final Long total = mOutput.count();
assertEquals(total, recordTs);
final long recordTs = mOutput
.filter((FilterFunction<MetadataRecord>) p -> p.getDateOfTransformation() == 1234)
.count();
assertEquals(total, recordNotEmpty);
final long recordNotEmpty = mOutput
.filter((FilterFunction<MetadataRecord>) p -> !StringUtils.isBlank(p.getBody()))
.count();
}
assertEquals(total, recordTs);
@Test
public void tryLoadFolderOnCP() throws Exception {
final String path = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
System.out.println("path = " + path);
Path tempDirWithPrefix = Files.createTempDirectory("mdstore_output");
System.out.println(tempDirWithPrefix.toFile().getAbsolutePath());
Files.deleteIfExists(tempDirWithPrefix);
assertEquals(total, recordNotEmpty);
}
}
private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {

View File

@ -1,7 +1,7 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:vocabulary="http://eu/dnetlib/transform/extension"
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
version="2.0"
exclude-result-prefixes="xsl vocabulary">

View File

@ -3,8 +3,8 @@
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oaf="http://namespace.openaire.eu/oaf"
xmlns:vocabulary="http://eu/dnetlib/trasform/extension"
xmlns:dateCleaner="http://eu/dnetlib/trasform/dates"
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
exclude-result-prefixes="xsl vocabulary dateCleaner">
<xsl:param name="varOfficialName"/>