diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml
index 10ee5f9ff2..c4bb9e21f3 100644
--- a/dhp-schemas/pom.xml
+++ b/dhp-schemas/pom.xml
@@ -67,6 +67,11 @@
guava
+
+ commons-codec
+ commons-codec
+
+
diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
index b5bca2e93e..b08e41a55d 100644
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java
@@ -3,11 +3,15 @@ package eu.dnetlib.dhp.schema.common;
import static com.google.common.base.Preconditions.checkArgument;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
+import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Maps;
@@ -473,4 +477,19 @@ public class ModelSupport {
private static String idFnForOafEntity(T t) {
return ((OafEntity) t).getId();
}
+
+ public static String md5(final String s) {
+ try {
+ final MessageDigest md = MessageDigest.getInstance("MD5");
+ md.update(s.getBytes(StandardCharsets.UTF_8));
+ return new String(Hex.encodeHex(md.digest()));
+ } catch (final NoSuchAlgorithmException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ public static String generateIdentifier(final String originalId, final String nsPrefix) {
+ return String.format("%s::%s", nsPrefix, md5(originalId));
+ }
+
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/MetadataRecord.java
similarity index 87%
rename from dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/MetadataRecord.java
index 0b59dcce07..9586680e3a 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/MetadataRecord.java
@@ -1,12 +1,14 @@
-package eu.dnetlib.dhp.model.mdstore;
+package eu.dnetlib.dhp.schema.mdstore;
import java.io.Serializable;
-import eu.dnetlib.dhp.utils.DHPUtils;
+import eu.dnetlib.dhp.schema.common.ModelSupport;
-/** This class models a record inside the new Metadata store collection on HDFS * */
-public class MetadataRecord implements Serializable {
+/**
+ * This class models a record in a Metadata store collection on HDFS
+ */
+ public class MetadataRecord implements Serializable {
/** The D-Net Identifier associated to the record */
private String id;
@@ -47,7 +49,7 @@ public class MetadataRecord implements Serializable {
this.provenance = provenance;
this.body = body;
this.dateOfCollection = dateOfCollection;
- this.id = DHPUtils.generateIdentifier(originalId, this.provenance.getNsPrefix());
+ this.id = ModelSupport.generateIdentifier(originalId, this.provenance.getNsPrefix());
}
public String getId() {
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/Provenance.java
similarity index 96%
rename from dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java
rename to dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/Provenance.java
index 556535022e..8af58f6284 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/Provenance.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/mdstore/Provenance.java
@@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.model.mdstore;
+package eu.dnetlib.dhp.schema.mdstore;
import java.io.Serializable;
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala
index f04f92c636..168ad218a3 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/GenerateDataciteDatasetSpark.scala
@@ -2,7 +2,7 @@ package eu.dnetlib.dhp.actionmanager.datacite
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord
+import eu.dnetlib.dhp.schema.mdstore.MetadataRecord
import eu.dnetlib.dhp.schema.oaf.Oaf
import eu.dnetlib.dhp.utils.ISLookupClientFactory
import org.apache.spark.SparkConf
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
index ee82cc94fa..043da31f9a 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
@@ -30,8 +30,8 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
-import eu.dnetlib.dhp.model.mdstore.Provenance;
+import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
+import eu.dnetlib.dhp.schema.mdstore.Provenance;
import scala.Tuple2;
public class GenerateNativeStoreSparkJob {
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java
index cc130c3769..6a09387085 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java
@@ -25,10 +25,9 @@ import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.message.MessageSender;
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
+import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
-import parquet.hadoop.ParquetReader;
public class TransformSparkJobNode {
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java
index 45ba2981ff..096d0e2896 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java
@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
+import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
index a813d84db1..d9b38e5729 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java
@@ -10,7 +10,7 @@ import org.apache.spark.api.java.function.MapFunction;
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
+import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
import net.sf.saxon.s9api.*;
public class XSLTTransformationFunction implements MapFunction {
diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java
index 723f030a6a..b8eb58ec26 100644
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java
@@ -38,8 +38,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest;
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
-import eu.dnetlib.dhp.model.mdstore.Provenance;
+import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
+import eu.dnetlib.dhp.schema.mdstore.Provenance;
import eu.dnetlib.dhp.transformation.TransformSparkJobNode;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java
index 3c0c8bf0fb..e29a8ac50e 100644
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java
@@ -26,7 +26,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
import eu.dnetlib.dhp.aggregation.AbstractVocabularyTest;
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
-import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
+import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.xslt.DateCleaner;
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;