forked from antonis.lempesis/dnet-hadoop
cleanup on transformation functions and the relative tests
This commit is contained in:
parent
fc3fa5e343
commit
9c899f4433
|
@ -4,7 +4,10 @@ package eu.dnetlib.dhp.transformation.xslt;
|
|||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import net.sf.saxon.s9api.*;
|
||||
import scala.Serializable;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
|
||||
|
||||
public class Cleaner implements ExtensionFunction, Serializable {
|
||||
|
||||
|
@ -16,7 +19,7 @@ public class Cleaner implements ExtensionFunction, Serializable {
|
|||
|
||||
@Override
|
||||
public QName getName() {
|
||||
return new QName("http://eu/dnetlib/transform/extension", "clean");
|
||||
return new QName(QNAME_BASE_URI + "/clean", "clean");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.transformation.xslt;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.*;
|
||||
|
@ -8,7 +9,8 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import net.sf.saxon.s9api.*;
|
||||
import scala.Serializable;
|
||||
|
||||
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
|
||||
|
||||
public class DateCleaner implements ExtensionFunction, Serializable {
|
||||
|
||||
|
@ -91,7 +93,7 @@ public class DateCleaner implements ExtensionFunction, Serializable {
|
|||
|
||||
@Override
|
||||
public QName getName() {
|
||||
return new QName("http://eu/dnetlib/trasform/dates", "dateISO");
|
||||
return new QName(QNAME_BASE_URI + "/dateISO", "dateISO");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -15,6 +15,8 @@ import net.sf.saxon.s9api.*;
|
|||
|
||||
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord> {
|
||||
|
||||
public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform";
|
||||
|
||||
private final AggregationCounter aggregationCounter;
|
||||
|
||||
private final String transformationRule;
|
||||
|
|
|
@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH;
|
|||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -35,26 +34,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|||
@ExtendWith(MockitoExtension.class)
|
||||
public class TransformationJobTest extends AbstractVocabularyTest {
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException, ISLookUpException {
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(TransformationJobTest.class.getSimpleName());
|
||||
conf.setMaster("local");
|
||||
spark = SparkSession.builder().config(conf).getOrCreate();
|
||||
}
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException, ISLookUpException {
|
||||
setUpVocabulary();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void afterAll() {
|
||||
spark.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Test Date cleaner")
|
||||
public void testDateCleaner() throws Exception {
|
||||
|
@ -82,68 +66,61 @@ public class TransformationJobTest extends AbstractVocabularyTest {
|
|||
// Print the record
|
||||
System.out.println(result.getBody());
|
||||
// TODO Create significant Assert
|
||||
|
||||
}
|
||||
|
||||
@DisplayName("Test TransformSparkJobNode.main")
|
||||
@Test
|
||||
@DisplayName("Test TransformSparkJobNode.main")
|
||||
public void transformTest(@TempDir Path testDir) throws Exception {
|
||||
|
||||
final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
|
||||
final String mdstore_output = testDir.toString() + "/version";
|
||||
SparkConf conf = new SparkConf();
|
||||
conf.setAppName(TransformationJobTest.class.getSimpleName());
|
||||
conf.setMaster("local");
|
||||
|
||||
mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl");
|
||||
try(SparkSession spark = SparkSession.builder().config(conf).getOrCreate()) {
|
||||
|
||||
final Map<String, String> parameters = Stream.of(new String[][] {
|
||||
{
|
||||
"dateOfTransformation", "1234"
|
||||
},
|
||||
{
|
||||
"transformationPlugin", "XSLT_TRANSFORM"
|
||||
},
|
||||
{
|
||||
"transformationRuleId", "simpleTRule"
|
||||
},
|
||||
final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
|
||||
final String mdstore_output = testDir.toString() + "/version";
|
||||
|
||||
}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
|
||||
mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl");
|
||||
|
||||
TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output);
|
||||
final Map<String, String> parameters = Stream.of(new String[][]{
|
||||
{
|
||||
"dateOfTransformation", "1234"
|
||||
},
|
||||
{
|
||||
"transformationPlugin", "XSLT_TRANSFORM"
|
||||
},
|
||||
{
|
||||
"transformationRuleId", "simpleTRule"
|
||||
},
|
||||
|
||||
// TODO introduce useful assertions
|
||||
}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
|
||||
|
||||
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
|
||||
final Dataset<MetadataRecord> mOutput = spark
|
||||
.read()
|
||||
.format("parquet")
|
||||
.load(mdstore_output + MDSTORE_DATA_PATH)
|
||||
.as(encoder);
|
||||
TransformSparkJobNode.transformRecords(parameters, isLookUpService, spark, mdstore_input, mdstore_output);
|
||||
|
||||
final Long total = mOutput.count();
|
||||
// TODO introduce useful assertions
|
||||
|
||||
final long recordTs = mOutput
|
||||
.filter((FilterFunction<MetadataRecord>) p -> p.getDateOfTransformation() == 1234)
|
||||
.count();
|
||||
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
|
||||
final Dataset<MetadataRecord> mOutput = spark
|
||||
.read()
|
||||
.format("parquet")
|
||||
.load(mdstore_output + MDSTORE_DATA_PATH)
|
||||
.as(encoder);
|
||||
|
||||
final long recordNotEmpty = mOutput
|
||||
.filter((FilterFunction<MetadataRecord>) p -> !StringUtils.isBlank(p.getBody()))
|
||||
.count();
|
||||
final Long total = mOutput.count();
|
||||
|
||||
assertEquals(total, recordTs);
|
||||
final long recordTs = mOutput
|
||||
.filter((FilterFunction<MetadataRecord>) p -> p.getDateOfTransformation() == 1234)
|
||||
.count();
|
||||
|
||||
assertEquals(total, recordNotEmpty);
|
||||
final long recordNotEmpty = mOutput
|
||||
.filter((FilterFunction<MetadataRecord>) p -> !StringUtils.isBlank(p.getBody()))
|
||||
.count();
|
||||
|
||||
}
|
||||
assertEquals(total, recordTs);
|
||||
|
||||
@Test
|
||||
public void tryLoadFolderOnCP() throws Exception {
|
||||
final String path = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
|
||||
System.out.println("path = " + path);
|
||||
|
||||
Path tempDirWithPrefix = Files.createTempDirectory("mdstore_output");
|
||||
|
||||
System.out.println(tempDirWithPrefix.toFile().getAbsolutePath());
|
||||
|
||||
Files.deleteIfExists(tempDirWithPrefix);
|
||||
assertEquals(total, recordNotEmpty);
|
||||
}
|
||||
}
|
||||
|
||||
private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:vocabulary="http://eu/dnetlib/transform/extension"
|
||||
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
version="2.0"
|
||||
exclude-result-prefixes="xsl vocabulary">
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||
xmlns:vocabulary="http://eu/dnetlib/trasform/extension"
|
||||
xmlns:dateCleaner="http://eu/dnetlib/trasform/dates"
|
||||
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
||||
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||
exclude-result-prefixes="xsl vocabulary dateCleaner">
|
||||
<xsl:param name="varOfficialName"/>
|
||||
|
|
Loading…
Reference in New Issue