1
0
Fork 0

code formatting

This commit is contained in:
Claudio Atzori 2021-02-02 12:34:14 +01:00
parent 75807ea5ae
commit bb89b99b24
6 changed files with 164 additions and 167 deletions

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.aggregation.common; package eu.dnetlib.dhp.aggregation.common;
public class AggregationConstants { public class AggregationConstants {
@ -10,6 +11,4 @@ public class AggregationConstants {
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
} }

View File

@ -5,7 +5,6 @@ import java.io.BufferedOutputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
@ -15,6 +14,8 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob; import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.model.mdstore.MetadataRecord;

View File

@ -1,11 +1,16 @@
package eu.dnetlib.dhp.collection; package eu.dnetlib.dhp.collection;
import com.fasterxml.jackson.databind.ObjectMapper; import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.model.mdstore.Provenance; import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.IntWritable;
@ -22,18 +27,15 @@ import org.dom4j.Node;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.model.mdstore.Provenance;
import scala.Tuple2; import scala.Tuple2;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import java.util.Optional;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
public class GenerateNativeStoreSparkJob { public class GenerateNativeStoreSparkJob {
private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class); private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class);

View File

@ -1,19 +1,17 @@
package eu.dnetlib.dhp.transformation; package eu.dnetlib.dhp.transformation;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.saveDataset;
import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.writeTotalSizeOnHDFS;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.aggregation.common.AggregationConstants;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
@ -25,7 +23,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
import eu.dnetlib.dhp.aggregation.common.AggregationUtility;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
@ -67,7 +64,6 @@ public class TransformSparkJobNode {
final String dateOfTransformation = parser.get("dateOfTransformation"); final String dateOfTransformation = parser.get("dateOfTransformation");
log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); log.info(String.format("dateOfTransformation: %s", dateOfTransformation));
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl); final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService); final VocabularyGroup vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService);
@ -94,7 +90,8 @@ public class TransformSparkJobNode {
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class); final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
saveDataset( saveDataset(
spark.read() spark
.read()
.format("parquet") .format("parquet")
.load(inputPath) .load(inputPath)
.as(encoder) .as(encoder)
@ -103,7 +100,6 @@ public class TransformSparkJobNode {
encoder), encoder),
outputPath + MDSTORE_DATA_PATH); outputPath + MDSTORE_DATA_PATH);
log.info("Transformed item " + ct.getProcessedItems().count()); log.info("Transformed item " + ct.getProcessedItems().count());
log.info("Total item " + ct.getTotalItems().count()); log.info("Total item " + ct.getTotalItems().count());
log.info("Transformation Error item " + ct.getErrorItems().count()); log.info("Transformation Error item " + ct.getErrorItems().count());

View File

@ -12,11 +12,6 @@ import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.transformation.TransformSparkJobNode;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.IntWritable;
@ -35,7 +30,12 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord; import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.TransformSparkJobNode;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class AggregationJobTest { public class AggregationJobTest {
@ -57,7 +57,8 @@ public class AggregationJobTest {
@BeforeAll @BeforeAll
public static void beforeAll() throws IOException { public static void beforeAll() throws IOException {
provenance = IOUtils.toString(AggregationJobTest.class.getResourceAsStream("/eu/dnetlib/dhp/collection/provenance.json")); provenance = IOUtils
.toString(AggregationJobTest.class.getResourceAsStream("/eu/dnetlib/dhp/collection/provenance.json"));
workingDir = Files.createTempDirectory(AggregationJobTest.class.getSimpleName()); workingDir = Files.createTempDirectory(AggregationJobTest.class.getSimpleName());
log.info("using work dir {}", workingDir); log.info("using work dir {}", workingDir);
@ -100,7 +101,7 @@ public class AggregationJobTest {
GenerateNativeStoreSparkJob GenerateNativeStoreSparkJob
.main( .main(
new String[]{ new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-encoding", encoding, "-encoding", encoding,
"-dateOfCollection", dateOfCollection, "-dateOfCollection", dateOfCollection,
@ -130,7 +131,7 @@ public class AggregationJobTest {
GenerateNativeStoreSparkJob GenerateNativeStoreSparkJob
.main( .main(
new String[]{ new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-encoding", encoding, "-encoding", encoding,
"-dateOfCollection", dateOfCollection, "-dateOfCollection", dateOfCollection,
@ -144,7 +145,6 @@ public class AggregationJobTest {
verify(mdStoreV2); verify(mdStoreV2);
} }
@Test @Test
@Order(3) @Order(3)
public void testTransformSparkJob() throws Exception { public void testTransformSparkJob() throws Exception {
@ -152,14 +152,16 @@ public class AggregationJobTest {
MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json");
MDStoreVersion mdStoreCleanedVersion = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreCleanedVersion.json"); MDStoreVersion mdStoreCleanedVersion = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreCleanedVersion.json");
TransformSparkJobNode.main(new String[]{ TransformSparkJobNode.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-dateOfTransformation", dateOfCollection, "-dateOfTransformation", dateOfCollection,
"-mdstoreInputVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV2), "-mdstoreInputVersion", OBJECT_MAPPER.writeValueAsString(mdStoreV2),
"-mdstoreOutputVersion", OBJECT_MAPPER.writeValueAsString(mdStoreCleanedVersion), "-mdstoreOutputVersion", OBJECT_MAPPER.writeValueAsString(mdStoreCleanedVersion),
"-transformationPlugin", "XSLT_TRANSFORM", "-transformationPlugin", "XSLT_TRANSFORM",
"-isLookupUrl", "https://dev-openaire.d4science.org/is/services/isLookUp", "-isLookupUrl", "https://dev-openaire.d4science.org/is/services/isLookUp",
"-transformationRuleId", "183dde52-a69b-4db9-a07e-1ef2be105294_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="}); "-transformationRuleId",
"183dde52-a69b-4db9-a07e-1ef2be105294_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="
});
} }

View File

@ -75,9 +75,6 @@ public class TransformationJobTest {
spark.stop(); spark.stop();
} }
@Test @Test
@DisplayName("Test Transform Single XML using XSLTTransformator") @DisplayName("Test Transform Single XML using XSLTTransformator")
public void testTransformSaxonHE() throws Exception { public void testTransformSaxonHE() throws Exception {