diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index 10ee5f9ff2..c4bb9e21f3 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -67,6 +67,11 @@ guava + + commons-codec + commons-codec + + diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index b5bca2e93e..b08e41a55d 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -3,11 +3,15 @@ package eu.dnetlib.dhp.schema.common; import static com.google.common.base.Preconditions.checkArgument; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.function.Function; +import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang3.StringUtils; import com.google.common.collect.Maps; @@ -473,4 +477,19 @@ public class ModelSupport { private static String idFnForOafEntity(T t) { return ((OafEntity) t).getId(); } + + public static String md5(final String s) { + try { + final MessageDigest md = MessageDigest.getInstance("MD5"); + md.update(s.getBytes(StandardCharsets.UTF_8)); + return new String(Hex.encodeHex(md.digest())); + } catch (final NoSuchAlgorithmException e) { + throw new IllegalStateException(e); + } + } + + public static String generateIdentifier(final String originalId, final String nsPrefix) { + return String.format("%s::%s", nsPrefix, md5(originalId)); + } + } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/MetadataRecord.java similarity index 87% rename from dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/MetadataRecord.java index 0b59dcce07..9586680e3a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/MetadataRecord.java @@ -1,12 +1,14 @@ -package eu.dnetlib.dhp.model.mdstore; +package eu.dnetlib.dhp.schema.mdstore; import java.io.Serializable; -import eu.dnetlib.dhp.utils.DHPUtils; +import eu.dnetlib.dhp.schema.common.ModelSupport; -/** This class models a record inside the new Metadata store collection on HDFS * */ -public class MetadataRecord implements Serializable { +/** + * This class models a record in a Metadata store collection on HDFS + */ + public class MetadataRecord implements Serializable { /** The D-Net Identifier associated to the record */ private String id; @@ -47,7 +49,7 @@ public class MetadataRecord implements Serializable { this.provenance = provenance; this.body = body; this.dateOfCollection = dateOfCollection; - this.id = DHPUtils.generateIdentifier(originalId, this.provenance.getNsPrefix()); + this.id = ModelSupport.generateIdentifier(originalId, this.provenance.getNsPrefix()); } public String getId() { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/Provenance.java similarity index 96% rename from dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/Provenance.java index 556535022e..8af58f6284 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/Provenance.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.model.mdstore; +package eu.dnetlib.dhp.schema.mdstore; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala index f04f92c636..168ad218a3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.actionmanager.datacite import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.model.mdstore.MetadataRecord +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.spark.SparkConf diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index ee82cc94fa..043da31f9a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -30,8 +30,8 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; -import eu.dnetlib.dhp.model.mdstore.Provenance; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.Provenance; import scala.Tuple2; public class GenerateNativeStoreSparkJob { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index cc130c3769..6a09387085 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -25,10 +25,9 @@ import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.message.MessageSender; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import parquet.hadoop.ParquetReader; public class TransformSparkJobNode { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index 45ba2981ff..096d0e2896 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index a813d84db1..d9b38e5729 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -10,7 +10,7 @@ import org.apache.spark.api.java.function.MapFunction; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import net.sf.saxon.s9api.*; public class XSLTTransformationFunction implements MapFunction { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java index 723f030a6a..b8eb58ec26 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java @@ -38,8 +38,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; -import eu.dnetlib.dhp.model.mdstore.Provenance; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.Provenance; import eu.dnetlib.dhp.transformation.TransformSparkJobNode; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 3c0c8bf0fb..e29a8ac50e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -26,7 +26,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; -import eu.dnetlib.dhp.model.mdstore.MetadataRecord; +import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.transformation.xslt.DateCleaner; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;