forked from D-Net/dnet-hadoop
code formatting
This commit is contained in:
parent
9c899f4433
commit
271e88537b
|
@ -1,14 +1,14 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.transformation.xslt;
|
package eu.dnetlib.dhp.transformation.xslt;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import net.sf.saxon.s9api.*;
|
import net.sf.saxon.s9api.*;
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
|
|
||||||
|
|
||||||
public class Cleaner implements ExtensionFunction, Serializable {
|
public class Cleaner implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
private final VocabularyGroup vocabularies;
|
private final VocabularyGroup vocabularies;
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.transformation.xslt;
|
package eu.dnetlib.dhp.transformation.xslt;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
|
@ -10,8 +12,6 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
import net.sf.saxon.s9api.*;
|
import net.sf.saxon.s9api.*;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
|
|
||||||
|
|
||||||
public class DateCleaner implements ExtensionFunction, Serializable {
|
public class DateCleaner implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
private final static List<Pattern> dateRegex = Arrays
|
private final static List<Pattern> dateRegex = Arrays
|
||||||
|
|
|
@ -76,23 +76,26 @@ public class TransformationJobTest extends AbstractVocabularyTest {
|
||||||
conf.setAppName(TransformationJobTest.class.getSimpleName());
|
conf.setAppName(TransformationJobTest.class.getSimpleName());
|
||||||
conf.setMaster("local");
|
conf.setMaster("local");
|
||||||
|
|
||||||
try(SparkSession spark = SparkSession.builder().config(conf).getOrCreate()) {
|
try (SparkSession spark = SparkSession.builder().config(conf).getOrCreate()) {
|
||||||
|
|
||||||
final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
|
final String mdstore_input = this
|
||||||
|
.getClass()
|
||||||
|
.getResource("/eu/dnetlib/dhp/transform/mdstorenative")
|
||||||
|
.getFile();
|
||||||
final String mdstore_output = testDir.toString() + "/version";
|
final String mdstore_output = testDir.toString() + "/version";
|
||||||
|
|
||||||
mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl");
|
mockupTrasformationRule("simpleTRule", "/eu/dnetlib/dhp/transform/ext_simple.xsl");
|
||||||
|
|
||||||
final Map<String, String> parameters = Stream.of(new String[][]{
|
final Map<String, String> parameters = Stream.of(new String[][] {
|
||||||
{
|
{
|
||||||
"dateOfTransformation", "1234"
|
"dateOfTransformation", "1234"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"transformationPlugin", "XSLT_TRANSFORM"
|
"transformationPlugin", "XSLT_TRANSFORM"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"transformationRuleId", "simpleTRule"
|
"transformationRuleId", "simpleTRule"
|
||||||
},
|
},
|
||||||
|
|
||||||
}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
|
}).collect(Collectors.toMap(data -> data[0], data -> data[1]));
|
||||||
|
|
||||||
|
@ -102,20 +105,20 @@ public class TransformationJobTest extends AbstractVocabularyTest {
|
||||||
|
|
||||||
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
|
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
|
||||||
final Dataset<MetadataRecord> mOutput = spark
|
final Dataset<MetadataRecord> mOutput = spark
|
||||||
.read()
|
.read()
|
||||||
.format("parquet")
|
.format("parquet")
|
||||||
.load(mdstore_output + MDSTORE_DATA_PATH)
|
.load(mdstore_output + MDSTORE_DATA_PATH)
|
||||||
.as(encoder);
|
.as(encoder);
|
||||||
|
|
||||||
final Long total = mOutput.count();
|
final Long total = mOutput.count();
|
||||||
|
|
||||||
final long recordTs = mOutput
|
final long recordTs = mOutput
|
||||||
.filter((FilterFunction<MetadataRecord>) p -> p.getDateOfTransformation() == 1234)
|
.filter((FilterFunction<MetadataRecord>) p -> p.getDateOfTransformation() == 1234)
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
final long recordNotEmpty = mOutput
|
final long recordNotEmpty = mOutput
|
||||||
.filter((FilterFunction<MetadataRecord>) p -> !StringUtils.isBlank(p.getBody()))
|
.filter((FilterFunction<MetadataRecord>) p -> !StringUtils.isBlank(p.getBody()))
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
assertEquals(total, recordTs);
|
assertEquals(total, recordTs);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue