diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml
index 1dc3208b5a..6e7ee527b7 100644
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@@ -29,6 +29,12 @@
spark-sql_2.11
+
+ eu.dnetlib.dhp
+ dhp-schemas
+ ${project.version}
+
+
commons-cli
commons-cli
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java
new file mode 100644
index 0000000000..02a9461543
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java
@@ -0,0 +1,90 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+import eu.dnetlib.dhp.utils.DHPUtils;
+import org.apache.commons.lang.StringUtils;
+import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * Factory class for OpenAIRE identifiers in the Graph
+ */
+public class IdentifierFactory implements Serializable {
+
+ private static final Logger log = LoggerFactory.getLogger(IdentifierFactory.class);
+
+ public static final String ID_SEPARATOR = "::";
+ public static final String ID_PREFIX_SEPARATOR = "|";
+ public final static String ID_REGEX = "^[0-9][0-9]\\"+ID_PREFIX_SEPARATOR+".{12}"+ID_SEPARATOR+"[a-zA-Z0-9]{32}$";
+ public static final int ID_PREFIX_LEN = 12;
+
+ public static Set acceptedPidTypes = new HashSet<>();
+
+ static {
+ acceptedPidTypes.add("doi");
+ acceptedPidTypes.add("doi");
+ acceptedPidTypes.add("doi");
+ acceptedPidTypes.add("doi");
+ acceptedPidTypes.add("doi");
+ acceptedPidTypes.add("doi");
+
+ }
+
+ public static String createIdentifier(T entity) {
+
+ if (Objects.isNull(entity.getPid()) || entity.getPid().isEmpty()) {
+ return entity.getId();
+ }
+
+ return entity
+ .getPid()
+ .stream()
+ .filter(s -> Objects.nonNull(s.getQualifier()))
+ .filter(s -> acceptedPidTypes.contains(s.getQualifier().getClassid()))
+ .max(new PidComparator(entity))
+ .map(s -> idFromPid(entity, s))
+ .map(IdentifierFactory::verifyIdSyntax)
+ .orElseGet(entity::getId);
+ }
+
+ protected static String verifyIdSyntax(String s) {
+ if(StringUtils.isBlank(s) || !s.matches(ID_REGEX)) {
+ throw new RuntimeException(String.format("malformed id: '%s'", s));
+ } else {
+ return s;
+ }
+ }
+
+ private static String idFromPid(T entity, StructuredProperty s) {
+ return new StringBuilder()
+ .append(StringUtils.substringBefore(entity.getId(), ID_PREFIX_SEPARATOR))
+ .append(ID_PREFIX_SEPARATOR)
+ .append(createPrefix(s.getQualifier().getClassid()))
+ .append(ID_SEPARATOR)
+ .append(DHPUtils.md5(normalizePidValue(s.getValue())))
+ .toString();
+ }
+
+ private static String normalizePidValue(String value) {
+ //TODO more aggressive cleaning? keep only alphanum and punctation?
+ return value.toLowerCase().replaceAll(" ", "");
+ }
+
+ // create the prefix (length = 12)
+ private static String createPrefix(String pidType) {
+ StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN));
+ while (prefix.length() < ID_PREFIX_LEN) {
+ prefix.append("_");
+ }
+ return prefix.substring(0, ID_PREFIX_LEN);
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java
new file mode 100644
index 0000000000..97bdd9c773
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/PidComparator.java
@@ -0,0 +1,84 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.Comparator;
+
+import eu.dnetlib.dhp.schema.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.Organization;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+
+public class PidComparator implements Comparator {
+
+ private T entity;
+
+ public PidComparator(T entity) {
+ this.entity = entity;
+ }
+
+ @Override
+ public int compare(StructuredProperty left, StructuredProperty right) {
+
+ if (left == null && right == null)
+ return 0;
+ if (left == null)
+ return 1;
+ if (right == null)
+ return -1;
+
+ String lClass = left.getQualifier().getClassid();
+ String rClass = right.getQualifier().getClassid();
+
+ if (lClass.equals(rClass))
+ return 0;
+
+ if (ModelSupport.isSubClass(entity, Result.class)) {
+ return compareResultPids(lClass, rClass);
+ }
+ if (ModelSupport.isSubClass(entity, Organization.class)) {
+ return compareOrganizationtPids(lClass, rClass);
+ }
+
+ // Else (but unlikely), lexicographical ordering will do.
+ return lClass.compareTo(rClass);
+ }
+
+ private int compareResultPids(String lClass, String rClass) {
+ if (lClass.equals("doi"))
+ return -1;
+ if (rClass.equals("doi"))
+ return 1;
+
+ if (lClass.equals("pmid"))
+ return -1;
+ if (rClass.equals("pmid"))
+ return 1;
+
+ if (lClass.equals("pmc"))
+ return -1;
+ if (rClass.equals("pmc"))
+ return 1;
+
+ return 0;
+ }
+
+ private int compareOrganizationtPids(String lClass, String rClass) {
+ if (lClass.equals("GRID"))
+ return -1;
+ if (rClass.equals("GRID"))
+ return 1;
+
+ if (lClass.equals("mag_id"))
+ return -1;
+ if (rClass.equals("mag_id"))
+ return 1;
+
+ if (lClass.equals("urn"))
+ return -1;
+ if (rClass.equals("urn"))
+ return 1;
+
+ return 0;
+ }
+}