From e76c4f62c1b1d6a35018f5fc27eb5377a9da94be Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 26 Feb 2021 10:58:48 +0100 Subject: [PATCH] MetadataRecord moved in dhp-schemas --- dhp-schemas/pom.xml | 5 +++++ .../dhp/schema/common/ModelSupport.java | 19 +++++++++++++++++++ .../dhp/schema}/mdstore/MetadataRecord.java | 12 +++++++----- .../dhp/schema}/mdstore/Provenance.java | 2 +- .../GenerateDataciteDatasetSpark.scala | 2 +- .../GenerateNativeStoreSparkJob.java | 4 ++-- .../transformation/TransformSparkJobNode.java | 3 +-- .../transformation/TransformationFactory.java | 2 +- .../xslt/XSLTTransformationFunction.java | 2 +- .../GenerateNativeStoreSparkJobTest.java | 4 ++-- .../transformation/TransformationJobTest.java | 2 +- 11 files changed, 41 insertions(+), 16 deletions(-) rename {dhp-common/src/main/java/eu/dnetlib/dhp/model => dhp-schemas/src/main/java/eu/dnetlib/dhp/schema}/mdstore/MetadataRecord.java (87%) rename {dhp-common/src/main/java/eu/dnetlib/dhp/model => dhp-schemas/src/main/java/eu/dnetlib/dhp/schema}/mdstore/Provenance.java (96%) diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index 10ee5f9ff..c4bb9e21f 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -67,6 +67,11 @@ guava + + commons-codec + commons-codec + + diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index b5bca2e93..b08e41a55 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -3,11 +3,15 @@ package eu.dnetlib.dhp.schema.common; import static com.google.common.base.Preconditions.checkArgument; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.function.Function; +import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang3.StringUtils; import com.google.common.collect.Maps; @@ -473,4 +477,19 @@ public class ModelSupport { private static String idFnForOafEntity(T t) { return ((OafEntity) t).getId(); } + + public static String md5(final String s) { + try { + final MessageDigest md = MessageDigest.getInstance("MD5"); + md.update(s.getBytes(StandardCharsets.UTF_8)); + return new String(Hex.encodeHex(md.digest())); + } catch (final NoSuchAlgorithmException e) { + throw new IllegalStateException(e); + } + } + + public static String generateIdentifier(final String originalId, final String nsPrefix) { + return String.format("%s::%s", nsPrefix, md5(originalId)); + } + } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/MetadataRecord.java similarity index 87% rename from dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/MetadataRecord.java index 0b59dcce0..9586680e3 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/MetadataRecord.java @@ -1,12 +1,14 @@ -package eu.dnetlib.dhp.model.mdstore; +package eu.dnetlib.dhp.schema.mdstore; import java.io.Serializable; -import eu.dnetlib.dhp.utils.DHPUtils; +import eu.dnetlib.dhp.schema.common.ModelSupport; -/** This class models a record inside the new Metadata store collection on HDFS * */ -public class MetadataRecord implements Serializable { +/** + * This class models a record in a Metadata store collection on HDFS + */ + public class MetadataRecord implements Serializable { /** The D-Net Identifier associated to the record */ private String id; @@ -47,7 +49,7 @@ public class MetadataRecord implements Serializable { this.provenance = provenance; this.body = body; this.dateOfCollection = dateOfCollection; - this.id = DHPUtils.generateIdentifier(originalId, this.provenance.getNsPrefix()); + this.id = ModelSupport.generateIdentifier(originalId, this.provenance.getNsPrefix()); } public String getId() { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/Provenance.java similarity index 96% rename from dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/Provenance.java index 556535022..8af58f628 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/Provenance.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.model.mdstore; +package eu.dnetlib.dhp.schema.mdstore; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala index f04f92c63..168ad218a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.actionmanager.datacite import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.model.mdstore.MetadataRecord +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.spark.SparkConf diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index ee82cc94f..043da31f9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -30,8 +30,8 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; -import eu.dnetlib.dhp.model.mdstore.Provenance; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.Provenance; import scala.Tuple2; public class GenerateNativeStoreSparkJob { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index cc130c376..6a0938708 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -25,10 +25,9 @@ import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.message.MessageSender; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import parquet.hadoop.ParquetReader; public class TransformSparkJobNode { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index 45ba2981f..096d0e289 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index a813d84db..d9b38e572 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -10,7 +10,7 @@ import org.apache.spark.api.java.function.MapFunction; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import net.sf.saxon.s9api.*; public class XSLTTransformationFunction implements MapFunction { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java index 723f030a6..b8eb58ec2 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java @@ -38,8 +38,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; -import eu.dnetlib.dhp.model.mdstore.Provenance; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.Provenance; import eu.dnetlib.dhp.transformation.TransformSparkJobNode; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 3c0c8bf0f..e29a8ac50 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -26,7 +26,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.transformation.xslt.DateCleaner; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;