forked from D-Net/dnet-hadoop
cleanup on transformation functions and the relative tests
This commit is contained in:
parent
fc3fa5e343
commit
9c899f4433
|
@ -4,7 +4,10 @@ package eu.dnetlib.dhp.transformation.xslt;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import net.sf.saxon.s9api.*;
|
import net.sf.saxon.s9api.*;
|
||||||
import scala.Serializable;
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
|
||||||
|
|
||||||
public class Cleaner implements ExtensionFunction, Serializable {
|
public class Cleaner implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
|
@ -16,7 +19,7 @@ public class Cleaner implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public QName getName() {
|
public QName getName() {
|
||||||
return new QName("http://eu/dnetlib/transform/extension", "clean");
|
return new QName(QNAME_BASE_URI + "/clean", "clean");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.transformation.xslt;
|
package eu.dnetlib.dhp.transformation.xslt;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
@ -8,7 +9,8 @@ import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import net.sf.saxon.s9api.*;
|
import net.sf.saxon.s9api.*;
|
||||||
import scala.Serializable;
|
|
||||||
|
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
|
||||||
|
|
||||||
public class DateCleaner implements ExtensionFunction, Serializable {
|
public class DateCleaner implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
|
@ -91,7 +93,7 @@ public class DateCleaner implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public QName getName() {
|
public QName getName() {
|
||||||
return new QName("http://eu/dnetlib/trasform/dates", "dateISO");
|
return new QName(QNAME_BASE_URI + "/dateISO", "dateISO");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -15,6 +15,8 @@ import net.sf.saxon.s9api.*;
|
||||||
|
|
||||||
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord> {
|
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord> {
|
||||||
|
|
||||||
|
public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform";
|
||||||
|
|
||||||
private final AggregationCounter aggregationCounter;
|
private final AggregationCounter aggregationCounter;
|
||||||
|
|
||||||
private final String transformationRule;
|
private final String transformationRule;
|
||||||
|
|
|
@ -5,7 +5,6 @@ import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH;
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -35,26 +34,11 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class TransformationJobTest extends AbstractVocabularyTest {
|
public class TransformationJobTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
private static SparkSession spark;
|
|
||||||
|
|
||||||
@BeforeAll
|
|
||||||
public static void beforeAll() throws IOException, ISLookUpException {
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
conf.setAppName(TransformationJobTest.class.getSimpleName());
|
|
||||||
conf.setMaster("local");
|
|
||||||
spark = SparkSession.builder().config(conf).getOrCreate();
|
|
||||||
}
|
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void setUp() throws IOException, ISLookUpException {
|
public void setUp() throws IOException, ISLookUpException {
|
||||||
setUpVocabulary();
|
setUpVocabulary();
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterAll
|
|
||||||
public static void afterAll() {
|
|
||||||
spark.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@DisplayName("Test Date cleaner")
|
@DisplayName("Test Date cleaner")
|
||||||
public void testDateCleaner() throws Exception {
|
public void testDateCleaner() throws Exception {
|
||||||
|
@ -82,19 +66,24 @@ public class TransformationJobTest extends AbstractVocabularyTest {
|
||||||
// Print the record
|
// Print the record
|
||||||
System.out.println(result.getBody());
|
System.out.println(result.getBody());
|
||||||
// TODO Create significant Assert
|
// TODO Create significant Assert
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@DisplayName("Test TransformSparkJobNode.main")
|
|
||||||
@Test
|
@Test
|
||||||
|
@DisplayName("Test TransformSparkJobNode.main")
|
||||||
public void transformTest(@TempDir Path testDir) throws Exception {
|
public void transformTest(@TempDir Path testDir) throws Exception {
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
conf.setAppName(TransformationJobTest.class.getSimpleName());
|
||||||
|
conf.setMaster("local");
|
||||||
|
|
||||||
|
try(SparkSession spark = SparkSession.builder().config(conf).getOrCreate()) {
|
||||||
|
|
||||||
final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
|
final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
|
||||||
final String mdstore_output = testDir.toString() + "/version";
|
final String mdstore_output = testDir.toString() + "/version";
|
||||||
|
|
||||||
mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl");
|
mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl");
|
||||||
|
|
||||||
final Map<String, String> parameters = Stream.of(new String[][] {
|
final Map<String, String> parameters = Stream.of(new String[][]{
|
||||||
{
|
{
|
||||||
"dateOfTransformation", "1234"
|
"dateOfTransformation", "1234"
|
||||||
},
|
},
|
||||||
|
@ -131,19 +120,7 @@ public class TransformationJobTest extends AbstractVocabularyTest {
|
||||||
assertEquals(total, recordTs);
|
assertEquals(total, recordTs);
|
||||||
|
|
||||||
assertEquals(total, recordNotEmpty);
|
assertEquals(total, recordNotEmpty);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void tryLoadFolderOnCP() throws Exception {
|
|
||||||
final String path = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
|
|
||||||
System.out.println("path = " + path);
|
|
||||||
|
|
||||||
Path tempDirWithPrefix = Files.createTempDirectory("mdstore_output");
|
|
||||||
|
|
||||||
System.out.println(tempDirWithPrefix.toFile().getAbsolutePath());
|
|
||||||
|
|
||||||
Files.deleteIfExists(tempDirWithPrefix);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {
|
private XSLTTransformationFunction loadTransformationRule(final String path) throws Exception {
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||||
xmlns:vocabulary="http://eu/dnetlib/transform/extension"
|
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
||||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||||
version="2.0"
|
version="2.0"
|
||||||
exclude-result-prefixes="xsl vocabulary">
|
exclude-result-prefixes="xsl vocabulary">
|
||||||
|
|
|
@ -3,8 +3,8 @@
|
||||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
|
||||||
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
xmlns:oaf="http://namespace.openaire.eu/oaf"
|
||||||
xmlns:vocabulary="http://eu/dnetlib/trasform/extension"
|
xmlns:vocabulary="http://eu/dnetlib/transform/clean"
|
||||||
xmlns:dateCleaner="http://eu/dnetlib/trasform/dates"
|
xmlns:dateCleaner="http://eu/dnetlib/transform/dateISO"
|
||||||
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
xmlns:dr="http://www.driver-repository.eu/namespace/dr"
|
||||||
exclude-result-prefixes="xsl vocabulary dateCleaner">
|
exclude-result-prefixes="xsl vocabulary dateCleaner">
|
||||||
<xsl:param name="varOfficialName"/>
|
<xsl:param name="varOfficialName"/>
|
||||||
|
|
Loading…
Reference in New Issue