diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml
index 44165995d..688b54f1d 100644
--- a/dhp-build/dhp-build-assembly-resources/pom.xml
+++ b/dhp-build/dhp-build-assembly-resources/pom.xml
@@ -6,7 +6,7 @@
eu.dnetlib.dhp
dhp-build
- 1.2.5-SNAPSHOT
+ 2.0.0-SNAPSHOT
dhp-build-assembly-resources
diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
index 7579bdf45..ebb917437 100644
--- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml
+++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
@@ -6,7 +6,7 @@
eu.dnetlib.dhp
dhp-build
- 1.2.5-SNAPSHOT
+ 2.0.0-SNAPSHOT
dhp-build-properties-maven-plugin
diff --git a/dhp-build/dhp-code-style/pom.xml b/dhp-build/dhp-code-style/pom.xml
index 5a86efe17..dfc3c75d9 100644
--- a/dhp-build/dhp-code-style/pom.xml
+++ b/dhp-build/dhp-code-style/pom.xml
@@ -5,7 +5,7 @@
eu.dnetlib.dhp
dhp-code-style
- 1.2.5-SNAPSHOT
+ 2.0.0-SNAPSHOT
jar
diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml
index 9040ea94e..21a277f8f 100644
--- a/dhp-build/pom.xml
+++ b/dhp-build/pom.xml
@@ -4,7 +4,7 @@
eu.dnetlib.dhp
dhp
- 1.2.5-SNAPSHOT
+ 2.0.0-SNAPSHOT
dhp-build
pom
diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml
index 6df11f4ea..377db5681 100644
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@@ -5,7 +5,7 @@
eu.dnetlib.dhp
dhp
- 1.2.5-SNAPSHOT
+ 2.0.0-SNAPSHOT
../pom.xml
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java
index 5d39216f1..2d292a139 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/action/ReadDatasourceMasterDuplicateFromDB.java
@@ -1,6 +1,8 @@
package eu.dnetlib.dhp.common.action;
+import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
+
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
@@ -19,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.common.action.model.MasterDuplicate;
-import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ReadDatasourceMasterDuplicateFromDB {
@@ -59,8 +60,8 @@ public class ReadDatasourceMasterDuplicateFromDB {
final String masterId = rs.getString("masterId");
final String masterName = rs.getString("masterName");
- md.setDuplicateId(OafMapperUtils.createOpenaireId(10, duplicateId, true));
- md.setMasterId(OafMapperUtils.createOpenaireId(10, masterId, true));
+ md.setDuplicateId(createOpenaireId(10, duplicateId, true));
+ md.setMasterId(createOpenaireId(10, masterId, true));
md.setMasterName(masterName);
return md;
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
index 2aeccfcf2..544da78f5 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java
@@ -9,13 +9,13 @@ import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHeaders;
import org.apache.http.entity.ContentType;
+import org.jetbrains.annotations.NotNull;
import com.google.gson.Gson;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
import okhttp3.*;
-import org.jetbrains.annotations.NotNull;
public class ZenodoAPIClient implements Serializable {
@@ -80,7 +80,7 @@ public class ZenodoAPIClient implements Serializable {
int responseCode = conn.getResponseCode();
conn.disconnect();
- if(!checkOKStatus(responseCode))
+ if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
@@ -115,7 +115,7 @@ public class ZenodoAPIClient implements Serializable {
}
int responseCode = conn.getResponseCode();
- if(! checkOKStatus(responseCode)){
+ if (!checkOKStatus(responseCode)) {
throw new IOException("Unexpected code " + responseCode + getBody(conn));
}
@@ -126,7 +126,7 @@ public class ZenodoAPIClient implements Serializable {
private String getBody(HttpURLConnection conn) throws IOException {
String body = "{}";
try (BufferedReader br = new BufferedReader(
- new InputStreamReader(conn.getInputStream(), "utf-8"))) {
+ new InputStreamReader(conn.getInputStream(), "utf-8"))) {
StringBuilder response = new StringBuilder();
String responseLine = null;
while ((responseLine = br.readLine()) != null) {
@@ -155,7 +155,6 @@ public class ZenodoAPIClient implements Serializable {
conn.setDoOutput(true);
conn.setRequestMethod("PUT");
-
try (OutputStream os = conn.getOutputStream()) {
byte[] input = metadata.getBytes("utf-8");
os.write(input, 0, input.length);
@@ -164,19 +163,18 @@ public class ZenodoAPIClient implements Serializable {
final int responseCode = conn.getResponseCode();
conn.disconnect();
- if(!checkOKStatus(responseCode))
+ if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + getBody(conn));
return responseCode;
-
}
- private boolean checkOKStatus(int responseCode) {
+ private boolean checkOKStatus(int responseCode) {
- if(HttpURLConnection.HTTP_OK != responseCode ||
- HttpURLConnection.HTTP_CREATED != responseCode)
- return true ;
+ if (HttpURLConnection.HTTP_OK != responseCode ||
+ HttpURLConnection.HTTP_CREATED != responseCode)
+ return true;
return false;
}
@@ -233,7 +231,6 @@ public class ZenodoAPIClient implements Serializable {
conn.setDoOutput(true);
conn.setRequestMethod("POST");
-
try (OutputStream os = conn.getOutputStream()) {
byte[] input = json.getBytes("utf-8");
os.write(input, 0, input.length);
@@ -245,7 +242,7 @@ public class ZenodoAPIClient implements Serializable {
int responseCode = conn.getResponseCode();
conn.disconnect();
- if(!checkOKStatus(responseCode))
+ if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
@@ -290,13 +287,12 @@ public class ZenodoAPIClient implements Serializable {
int responseCode = conn.getResponseCode();
conn.disconnect();
- if(!checkOKStatus(responseCode))
+ if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
bucket = zenodoModel.getLinks().getBucket();
-
return responseCode;
}
@@ -331,22 +327,16 @@ public class ZenodoAPIClient implements Serializable {
conn.setDoOutput(true);
conn.setRequestMethod("GET");
-
-
String body = getBody(conn);
int responseCode = conn.getResponseCode();
conn.disconnect();
- if(!checkOKStatus(responseCode))
+ if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
-
-
return body;
-
-
}
private String getBucket(String inputUurl) throws IOException {
@@ -363,15 +353,13 @@ public class ZenodoAPIClient implements Serializable {
int responseCode = conn.getResponseCode();
conn.disconnect();
- if(!checkOKStatus(responseCode))
+ if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
return zenodoModel.getLinks().getBucket();
-
-
}
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java
index 2ab23bda6..879a09481 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java
@@ -73,11 +73,11 @@ public class Vocabulary implements Serializable {
public Qualifier getTermAsQualifier(final String termId, boolean strict) {
final VocabularyTerm term = getTerm(termId);
if (Objects.nonNull(term)) {
- return OafMapperUtils.qualifier(term.getId(), term.getName(), getId(), getName());
+ return OafMapperUtils.qualifier(term.getId(), term.getName(), getId());
} else if (Objects.isNull(term) && strict) {
- return OafMapperUtils.unknown(getId(), getName());
+ return OafMapperUtils.unknown(getId());
} else {
- return OafMapperUtils.qualifier(termId, termId, getId(), getName());
+ return OafMapperUtils.qualifier(termId, termId, getId());
}
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java
index fc7175270..ccd2a7d1b 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java
@@ -125,12 +125,12 @@ public class VocabularyGroup implements Serializable {
if (vocabularyExists(vocId)) {
return vocs.get(vocId.toLowerCase()).getTermAsQualifier(id);
}
- return OafMapperUtils.qualifier(id, id, "", "");
+ return OafMapperUtils.qualifier(id, id, "");
}
public Qualifier getSynonymAsQualifier(final String vocId, final String syn) {
if (StringUtils.isBlank(vocId)) {
- return OafMapperUtils.unknown("", "");
+ return OafMapperUtils.unknown("");
}
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
}
@@ -142,7 +142,7 @@ public class VocabularyGroup implements Serializable {
*/
public Qualifier getSynonymAsQualifierCaseSensitive(final String vocId, final String syn) {
if (StringUtils.isBlank(vocId)) {
- return OafMapperUtils.unknown("", "");
+ return OafMapperUtils.unknown("");
}
return vocs.get(vocId).getSynonymAsQualifier(syn);
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
index aea046203..ff6fcde94 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java
@@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils;
import com.wcohen.ss.JaroWinkler;
import eu.dnetlib.dhp.schema.oaf.Author;
+import eu.dnetlib.dhp.schema.oaf.AuthorPid;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.model.Person;
import scala.Tuple2;
@@ -75,7 +76,7 @@ public class AuthorMerger {
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
// (list of pid that are missing in the other list)
- final List> pidToEnrich = enrich
+ final List> pidToEnrich = enrich
.stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap(
@@ -111,7 +112,7 @@ public class AuthorMerger {
// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
// it creates of fixed size, and the add method raise UnsupportedOperationException at
// java.util.AbstractList.add
- final List tmp = new ArrayList<>(r.getPid());
+ final List tmp = new ArrayList<>(r.getPid());
tmp.add(a._1());
r.setPid(tmp);
}
@@ -120,10 +121,12 @@ public class AuthorMerger {
}
public static String pidToComparableString(StructuredProperty pid) {
- final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
- : "";
- return (pid.getQualifier() != null ? classid : "")
- + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
+ return pid.toComparableString();
+ /*
+ * final String classid = pid.getQualifier().getClassid() != null ?
+ * pid.getQualifier().getClassid().toLowerCase() : ""; return (pid.getQualifier() != null ? classid : "") +
+ * (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
+ */
}
public static int countAuthorsPids(List authors) {
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java
index 3f65d754f..b74f895ff 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/DispatchEntitiesSparkJob.java
@@ -21,8 +21,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.Oaf;
-import eu.dnetlib.dhp.schema.oaf.OafEntity;
public class DispatchEntitiesSparkJob {
@@ -58,7 +58,7 @@ public class DispatchEntitiesSparkJob {
log.info("graphTableClassName: {}", graphTableClassName);
@SuppressWarnings("unchecked")
- Class extends OafEntity> entityClazz = (Class extends OafEntity>) Class.forName(graphTableClassName);
+ Class extends Entity> entityClazz = (Class extends Entity>) Class.forName(graphTableClassName);
SparkConf conf = new SparkConf();
runWithSparkSession(
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
index e652bd5b6..491e98874 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/GroupEntitiesSparkJob.java
@@ -30,9 +30,9 @@ import com.jayway.jsonpath.Option;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
-import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
+import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import scala.Tuple2;
/**
@@ -87,17 +87,17 @@ public class GroupEntitiesSparkJob {
String inputPath,
String outputPath) {
- final TypedColumn aggregator = new GroupingAggregator().toColumn();
+ final TypedColumn aggregator = new GroupingAggregator().toColumn();
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
spark
.read()
.textFile(toSeq(listEntityPaths(inputPath, sc)))
- .map((MapFunction) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(OafEntity.class))
- .filter((FilterFunction) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e)))
- .groupByKey((MapFunction) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING())
+ .map((MapFunction) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(Entity.class))
+ .filter((FilterFunction) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e)))
+ .groupByKey((MapFunction) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING())
.agg(aggregator)
.map(
- (MapFunction, String>) t -> t._2().getClass().getName() +
+ (MapFunction, String>) t -> t._2().getClass().getName() +
"|" + OBJECT_MAPPER.writeValueAsString(t._2()),
Encoders.STRING())
.write()
@@ -106,48 +106,48 @@ public class GroupEntitiesSparkJob {
.text(outputPath);
}
- public static class GroupingAggregator extends Aggregator {
+ public static class GroupingAggregator extends Aggregator {
@Override
- public OafEntity zero() {
+ public Entity zero() {
return null;
}
@Override
- public OafEntity reduce(OafEntity b, OafEntity a) {
+ public Entity reduce(Entity b, Entity a) {
return mergeAndGet(b, a);
}
- private OafEntity mergeAndGet(OafEntity b, OafEntity a) {
+ private Entity mergeAndGet(Entity b, Entity a) {
if (Objects.nonNull(a) && Objects.nonNull(b)) {
- return OafMapperUtils.mergeEntities(b, a);
+ return MergeUtils.merge(b, a, true);
}
return Objects.isNull(a) ? b : a;
}
@Override
- public OafEntity merge(OafEntity b, OafEntity a) {
+ public Entity merge(Entity b, Entity a) {
return mergeAndGet(b, a);
}
@Override
- public OafEntity finish(OafEntity j) {
+ public Entity finish(Entity j) {
return j;
}
@Override
- public Encoder bufferEncoder() {
- return Encoders.kryo(OafEntity.class);
+ public Encoder bufferEncoder() {
+ return Encoders.kryo(Entity.class);
}
@Override
- public Encoder outputEncoder() {
- return Encoders.kryo(OafEntity.class);
+ public Encoder outputEncoder() {
+ return Encoders.kryo(Entity.class);
}
}
- private static OafEntity parseOaf(String s) {
+ private static Entity parseOaf(String s) {
DocumentContext dc = JsonPath
.parse(s, Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS));
@@ -184,7 +184,7 @@ public class GroupEntitiesSparkJob {
}
}
- private static OafEntity parse(String s, Class clazz) {
+ private static Entity parse(String s, Class clazz) {
try {
return OBJECT_MAPPER.readValue(s, clazz);
} catch (IOException e) {
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
new file mode 100644
index 000000000..9ef2a23bd
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/common/ModelConstants.java
@@ -0,0 +1,172 @@
+
+package eu.dnetlib.dhp.schema.common;
+
+import eu.dnetlib.dhp.schema.oaf.AccessRight;
+import eu.dnetlib.dhp.schema.oaf.KeyValue;
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
+import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
+
+public class ModelConstants {
+
+ private ModelConstants() {
+ }
+
+ public static final String DOI = "doi";
+ public static final String ORCID = "orcid";
+ public static final String ORCID_PENDING = "orcid_pending";
+ public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
+ public static final String ORCID_DS = ORCID.toUpperCase();
+
+ public static final String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2";
+
+ public static final String CROSSREF_NAME = "Crossref";
+ public static final String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254";
+
+ public static final String ZENODO_OD_ID = "10|opendoar____::358aee4cc897452c00244351e4d91f69";
+ public static final String ZENODO_R3_ID = "10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6";
+
+ public static final String EUROPE_PUBMED_CENTRAL_ID = "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c";
+ public static final String PUBMED_CENTRAL_ID = "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357";
+ public static final String ARXIV_ID = "10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23";
+ public static final String ROHUB_ID = "10|fairsharing_::1b69ebedb522700034547abc5652ffac";
+
+ public static final String OPENORGS_NAME = "OpenOrgs Database";
+
+ public static final String OPENOCITATIONS_NAME = "OpenCitations";
+ public static final String OPENOCITATIONS_ID = "10|openaire____::c06df618c5de1c786535ccf3f8b7b059";
+
+ public static final String OPEN_APC_NAME = "OpenAPC Global Initiative";
+ public static final String OPEN_APC_ID = "10|apc_________::e2b1600b229fc30663c8a1f662debddf";
+
+ // VOCABULARY VALUE
+ public static final String ACCESS_RIGHT_OPEN = "OPEN";
+ public static final String ACCESS_RIGHT_EMBARGO = "EMBARGO";
+ public static final String ACCESS_RIGHT_CLOSED = "CLOSED";
+
+ public static final String DNET_SUBJECT_KEYWORD = "keyword";
+
+ public static final String DNET_SUBJECT_FOS_CLASSID = "FOS";
+
+ public static final String DNET_SUBJECT_FOS_CLASSNAME = "Fields of Science and Technology classification";
+
+ public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
+ public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
+ public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource";
+ public static final String DNET_ACCESS_MODES = "dnet:access_modes";
+ public static final String DNET_LANGUAGES = "dnet:languages";
+ public static final String DNET_PID_TYPES = "dnet:pid_types";
+ public static final String DNET_DATACITE_DATE = "dnet:dataCite_date";
+ public static final String DNET_DATACITE_TITLE = "dnet:dataCite_title";
+ public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource";
+ public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
+ public static final String DNET_COUNTRY_TYPE = "dnet:countries";
+ public static final String DNET_REVIEW_LEVELS = "dnet:review_levels";
+ public static final String DNET_PROGRAMMING_LANGUAGES = "dnet:programming_languages";
+ public static final String DNET_EXTERNAL_REFERENCE_TYPE = "dnet:externalReference_typologies";
+ public static final String DNET_RELATION_RELTYPE = "dnet:relation_relType";
+ public static final String DNET_RELATION_SUBRELTYPE = "dnet:relation_subRelType";
+ public static final String DNET_RELATION_RELCLASS = "dnet:relation_relClass";
+
+ public static final String PEER_REVIEWED_CLASSNAME = "nonPeerReviewed";
+ public static final String NON_PEER_REVIEWED_CLASSNAME = "nonPeerReviewed";
+ public static final String PEER_REVIEWED_CLASSID = "0001";
+ public static final String NON_PEER_REVIEWED_CLASSID = "0002";
+
+ public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository";
+ public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry";
+ public static final String SYSIMPORT_ACTIONSET = "sysimport:actionset";
+ public static final String SYSIMPORT_ORCID_NO_DOI = "sysimport:actionset:orcidworks-no-doi";
+
+ public static final String USER_CLAIM = "user:claim";
+ public static final String HARVESTED = "Harvested";
+
+ public static final String PROVENANCE_DEDUP = "sysimport:dedup";
+ public static final String PROVENANCE_ENRICH = "sysimport:enrich";
+
+ public static final Qualifier PROVENANCE_ACTION_SET_QUALIFIER = qualifier(
+ SYSIMPORT_ACTIONSET, SYSIMPORT_ACTIONSET, DNET_PROVENANCE_ACTIONS);
+
+ public static final String UNKNOWN = "UNKNOWN";
+ public static final String NOT_AVAILABLE = "not available";
+
+ public static final Qualifier REPOSITORY_PROVENANCE_ACTIONS = qualifier(
+ SYSIMPORT_CROSSWALK_REPOSITORY, SYSIMPORT_CROSSWALK_REPOSITORY,
+ DNET_PROVENANCE_ACTIONS);
+
+ public static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION = qualifier(
+ SYSIMPORT_CROSSWALK_ENTITYREGISTRY, SYSIMPORT_CROSSWALK_ENTITYREGISTRY,
+ DNET_PROVENANCE_ACTIONS);
+
+ public static final String UNKNOWN_REPOSITORY_ORIGINALID = "openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18";
+ public static final KeyValue UNKNOWN_REPOSITORY = keyValue(
+ "10|openaire____::55045bd2a65019fd8e6741a755395c8c", "Unknown Repository");
+
+ public static final Qualifier UNKNOWN_COUNTRY = qualifier(UNKNOWN, "Unknown", DNET_COUNTRY_TYPE);
+
+ public static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
+ "main title", "main title", DNET_DATACITE_TITLE);
+
+ public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier(
+ "alternative title", "alternative title", DNET_DATACITE_TITLE);
+
+ public static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE);
+
+ public static final AccessRight OPEN_ACCESS_RIGHT() {
+
+ final AccessRight result = new AccessRight();
+ result.setClassid(ACCESS_RIGHT_OPEN);
+ result.setClassid(ACCESS_RIGHT_OPEN);
+ result.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
+ return result;
+ }
+
+ public static final AccessRight RESTRICTED_ACCESS_RIGHT() {
+ final AccessRight result = new AccessRight();
+ result.setClassid("RESTRICTED");
+ result.setClassname("Restricted");
+ result.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
+ return result;
+ }
+
+ public static final AccessRight UNKNOWN_ACCESS_RIGHT() {
+ return OafMapperUtils
+ .accessRight(
+ ModelConstants.UNKNOWN,
+ ModelConstants.NOT_AVAILABLE,
+ ModelConstants.DNET_ACCESS_MODES);
+ }
+
+ public static final AccessRight EMBARGOED_ACCESS_RIGHT() {
+ return OafMapperUtils
+ .accessRight(
+ ACCESS_RIGHT_EMBARGO,
+ ACCESS_RIGHT_EMBARGO,
+ DNET_ACCESS_MODES);
+ }
+
+ public static final AccessRight CLOSED_ACCESS_RIGHT() {
+ return OafMapperUtils
+ .accessRight(
+ ACCESS_RIGHT_CLOSED,
+ "Closed Access",
+ ModelConstants.DNET_ACCESS_MODES);
+ }
+
+ private static Qualifier qualifier(
+ final String classid,
+ final String classname,
+ final String schemeid) {
+ final Qualifier q = new Qualifier();
+ q.setClassid(classid);
+ q.setClassname(classname);
+ q.setSchemeid(schemeid);
+ return q;
+ }
+
+ private static KeyValue keyValue(final String key, final String value) {
+ final KeyValue kv = new KeyValue();
+ kv.setKey(key);
+ kv.setValue(value);
+ return kv;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/AccessRightComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/AccessRightComparator.java
new file mode 100644
index 000000000..3e80bd95b
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/AccessRightComparator.java
@@ -0,0 +1,69 @@
+
+package eu.dnetlib.dhp.schema.oaf.common;
+
+import java.util.Comparator;
+
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
+
+public class AccessRightComparator implements Comparator {
+
+ @Override
+ public int compare(T left, T right) {
+
+ if (left == null && right == null)
+ return 0;
+ if (left == null)
+ return 1;
+ if (right == null)
+ return -1;
+
+ String lClass = left.getClassid();
+ String rClass = right.getClassid();
+
+ if (lClass.equals(rClass))
+ return 0;
+
+ if (lClass.equals("OPEN SOURCE"))
+ return -1;
+ if (rClass.equals("OPEN SOURCE"))
+ return 1;
+
+ if (lClass.equals("OPEN"))
+ return -1;
+ if (rClass.equals("OPEN"))
+ return 1;
+
+ if (lClass.equals("6MONTHS"))
+ return -1;
+ if (rClass.equals("6MONTHS"))
+ return 1;
+
+ if (lClass.equals("12MONTHS"))
+ return -1;
+ if (rClass.equals("12MONTHS"))
+ return 1;
+
+ if (lClass.equals("EMBARGO"))
+ return -1;
+ if (rClass.equals("EMBARGO"))
+ return 1;
+
+ if (lClass.equals("RESTRICTED"))
+ return -1;
+ if (rClass.equals("RESTRICTED"))
+ return 1;
+
+ if (lClass.equals("CLOSED"))
+ return -1;
+ if (rClass.equals("CLOSED"))
+ return 1;
+
+ if (lClass.equals("UNKNOWN"))
+ return -1;
+ if (rClass.equals("UNKNOWN"))
+ return 1;
+
+ // Else (but unlikely), lexicographical ordering will do.
+ return lClass.compareTo(rClass);
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/EntityType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/EntityType.java
new file mode 100644
index 000000000..81188fb11
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/EntityType.java
@@ -0,0 +1,21 @@
+
+package eu.dnetlib.dhp.schema.oaf.common;
+
+import eu.dnetlib.dhp.schema.oaf.Entity;
+
+/** Actual entity types in the Graph */
+public enum EntityType {
+ publication, dataset, otherresearchproduct, software, datasource, organization, project;
+
+ /**
+ * Resolves the EntityType, given the relative class name
+ *
+ * @param clazz the given class name
+ * @param actual OafEntity subclass
+ * @return the EntityType associated to the given class
+ */
+ public static EntityType fromClass(Class clazz) {
+
+ return EntityType.valueOf(clazz.getSimpleName().toLowerCase());
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/MainEntityType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/MainEntityType.java
new file mode 100644
index 000000000..0ed0b65fd
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/MainEntityType.java
@@ -0,0 +1,7 @@
+
+package eu.dnetlib.dhp.schema.oaf.common;
+
+/** Main entity types in the Graph */
+public enum MainEntityType {
+ result, datasource, organization, project
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java
new file mode 100644
index 000000000..6d51e44d2
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/ModelSupport.java
@@ -0,0 +1,352 @@
+
+package eu.dnetlib.dhp.schema.oaf.common;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.text.ParseException;
+import java.util.*;
+import java.util.function.Function;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+
+import com.github.sisyphsu.dateparser.DateParserUtils;
+import com.google.common.collect.Maps;
+
+import eu.dnetlib.dhp.schema.oaf.*;
+
+/** Oaf model utility methods. */
+public class ModelSupport {
+
+ /** Defines the mapping between the actual entity type and the main entity type */
+ private static final Map entityMapping = Maps.newHashMap();
+
+ static {
+ entityMapping.put(EntityType.publication, MainEntityType.result);
+ entityMapping.put(EntityType.dataset, MainEntityType.result);
+ entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result);
+ entityMapping.put(EntityType.software, MainEntityType.result);
+ entityMapping.put(EntityType.datasource, MainEntityType.datasource);
+ entityMapping.put(EntityType.organization, MainEntityType.organization);
+ entityMapping.put(EntityType.project, MainEntityType.project);
+ }
+
+ /**
+ * Defines the mapping between the actual entity types and the relative classes implementing them
+ */
+ public static final Map entityTypes = Maps.newHashMap();
+
+ static {
+ entityTypes.put(EntityType.datasource, Datasource.class);
+ entityTypes.put(EntityType.organization, Organization.class);
+ entityTypes.put(EntityType.project, Project.class);
+ entityTypes.put(EntityType.dataset, Dataset.class);
+ entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class);
+ entityTypes.put(EntityType.software, Software.class);
+ entityTypes.put(EntityType.publication, Publication.class);
+ }
+
+ public static final Map oafTypes = Maps.newHashMap();
+
+ static {
+ oafTypes.put("datasource", Datasource.class);
+ oafTypes.put("organization", Organization.class);
+ oafTypes.put("project", Project.class);
+ oafTypes.put("dataset", Dataset.class);
+ oafTypes.put("otherresearchproduct", OtherResearchProduct.class);
+ oafTypes.put("software", Software.class);
+ oafTypes.put("publication", Publication.class);
+ oafTypes.put("relation", Relation.class);
+ }
+
+ public static final Map idPrefixMap = Maps.newHashMap();
+
+ static {
+ idPrefixMap.put(Datasource.class, "10");
+ idPrefixMap.put(Organization.class, "20");
+ idPrefixMap.put(Project.class, "40");
+ idPrefixMap.put(Dataset.class, "50");
+ idPrefixMap.put(OtherResearchProduct.class, "50");
+ idPrefixMap.put(Software.class, "50");
+ idPrefixMap.put(Publication.class, "50");
+ }
+
+ public static final Map entityIdPrefix = Maps.newHashMap();
+
+ static {
+ entityIdPrefix.put("datasource", "10");
+ entityIdPrefix.put("organization", "20");
+ entityIdPrefix.put("project", "40");
+ entityIdPrefix.put("result", "50");
+ }
+
+ public static final Map idPrefixEntity = Maps.newHashMap();
+
+ static {
+ idPrefixEntity.put("10", "datasource");
+ idPrefixEntity.put("20", "organization");
+ idPrefixEntity.put("40", "project");
+ idPrefixEntity.put("50", "result");
+ }
+
+ public static String getEntityTypeFromId(final String id) {
+ //TODO We should create a class which define the identifier and parse it
+ if (StringUtils.isBlank(id))
+ return null;
+ return idPrefixEntity.get(id.substring(0,2));
+ }
+
+ /**
+ * Helper method: combines the relation attributes
+ * @param relType
+ * @param subRelType
+ * @param relClass
+ * @return
+ */
+ public static String rel(String relType, String subRelType, String relClass) {
+ return String.format("%s_%s_%s", relType, subRelType, relClass);
+ }
+
+ /**
+ * Helper method: deserialize the relation attributes serialized with rel
+ * @param deserialization
+ * @return
+ */
+ public static RelationLabel unRel(String deserialization) {
+ final String[] s = deserialization.split("_");
+ if (s!= null && s.length==3) {
+ final Relation.RELTYPE currentRelType = Relation.RELTYPE.valueOf(s[0]);
+ final Relation.SUBRELTYPE currentSubRelType = Relation.SUBRELTYPE.valueOf(s[1]);
+ final Relation.RELCLASS currentRelClass = Relation.RELCLASS.valueOf(s[2]);
+ return new RelationLabel(currentRelClass, currentRelType, currentSubRelType);
+ }
+ throw new IllegalArgumentException("Invalid relationship format for "+ deserialization);
+ }
+
+ private static final String schemeTemplate = "dnet:%s_%s_relations";
+
+ public static final String DATE_FORMAT = "yyyy-MM-dd";
+
+ private ModelSupport() {
+ }
+
+ public static String getIdPrefix(Class clazz) {
+ return idPrefixMap.get(clazz);
+ }
+
+ public static Boolean sameClass(X left, Y right,
+ Class superClazz) {
+ return isSubClass(left, superClazz) && isSubClass(right, superClazz);
+ }
+
+ /**
+ * Checks subclass-superclass relationship.
+ *
+ * @param subClazzObject Subclass object instance
+ * @param superClazzObject Superclass object instance
+ * @param Subclass type
+ * @param Superclass type
+ * @return True if X is a subclass of Y
+ */
+ public static Boolean isSubClass(
+ X subClazzObject, Y superClazzObject) {
+ return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
+ }
+
+ /**
+ * Checks subclass-superclass relationship.
+ *
+ * @param subClazzObject Subclass object instance
+ * @param superClazz Superclass class
+ * @param Subclass type
+ * @param Superclass type
+ * @return True if X is a subclass of Y
+ */
+ public static Boolean isSubClass(
+ X subClazzObject, Class superClazz) {
+ return isSubClass(subClazzObject.getClass(), superClazz);
+ }
+
+ /**
+ * Checks subclass-superclass relationship.
+ *
+ * @param subClazz Subclass class
+ * @param superClazz Superclass class
+ * @param Subclass type
+ * @param Superclass type
+ * @return True if X is a subclass of Y
+ */
+ public static Boolean isSubClass(
+ Class subClazz, Class superClazz) {
+ return superClazz.isAssignableFrom(subClazz);
+ }
+
+ /**
+ * Lists all the OAF model classes
+ *
+ * @param
+ * @return
+ */
+ public static Class[] getOafModelClasses() {
+ return new Class[] {
+ AccessRight.class,
+ Author.class,
+ AuthorPid.class,
+ Context.class,
+ Country.class,
+ DataInfo.class,
+ Dataset.class,
+ Datasource.class,
+ Entity.class,
+ EntityDataInfo.class,
+ EoscIfGuidelines.class,
+ ExternalReference.class,
+ ExtraInfo.class,
+ GeoLocation.class,
+ H2020Classification.class,
+ H2020Programme.class,
+ Instance.class,
+ Journal.class,
+ KeyValue.class,
+ License.class,
+ Measure.class,
+ OAIProvenance.class,
+ OpenAccessRoute.class,
+ Organization.class,
+ OriginDescription.class,
+ OtherResearchProduct.class,
+ Project.class,
+ Provenance.class,
+ Publication.class,
+ Publisher.class,
+ Qualifier.class,
+ Relation.class,
+ Result.class,
+ Software.class,
+ StructuredProperty.class,
+ Subject.class
+ };
+ }
+
+ public static String getMainType(final EntityType type) {
+ return entityMapping.get(type).name();
+ }
+
+ public static boolean isResult(EntityType type) {
+ return MainEntityType.result.name().equals(getMainType(type));
+ }
+
+ public static String getScheme(final String sourceType, final String targetType) {
+ return String
+ .format(
+ schemeTemplate,
+ entityMapping.get(EntityType.valueOf(sourceType)).name(),
+ entityMapping.get(EntityType.valueOf(targetType)).name());
+ }
+
+ public static String tableIdentifier(String dbName, String tableName) {
+
+ checkArgument(StringUtils.isNotBlank(dbName), "DB name cannot be empty");
+ checkArgument(StringUtils.isNotBlank(tableName), "table name cannot be empty");
+
+ return String.format("%s.%s", dbName, tableName);
+ }
+
+ public static String tableIdentifier(String dbName, Class clazz) {
+
+ checkArgument(Objects.nonNull(clazz), "clazz is needed to derive the table name, thus cannot be null");
+
+ return tableIdentifier(dbName, clazz.getSimpleName().toLowerCase());
+ }
+
+ public static Function idFn() {
+ return x -> {
+ if (isSubClass(x, Relation.class)) {
+ return idFnForRelation(x);
+ }
+ return idFnForOafEntity(x);
+ };
+ }
+
+ private static String idFnForRelation(T t) {
+ Relation r = (Relation) t;
+ return Optional
+ .ofNullable(r.getSource())
+ .map(
+ source -> Optional
+ .ofNullable(r.getTarget())
+ .map(
+ target -> Optional
+ .ofNullable(r.getRelType())
+ .map(
+ relType -> Optional
+ .ofNullable(r.getSubRelType())
+ .map(
+ subRelType -> Optional
+ .ofNullable(r.getRelClass())
+ .map(
+ relClass -> String
+ .join(
+ source,
+ target,
+ relType.toString(),
+ subRelType.toString(),
+ relClass.toString()))
+ .orElse(
+ String
+ .join(
+ source,
+ target,
+ relType.toString(),
+ subRelType.toString())))
+ .orElse(String.join(source, target, relType.toString())))
+ .orElse(String.join(source, target)))
+ .orElse(source))
+ .orElse(null);
+ }
+
+ private static String idFnForOafEntity(T t) {
+ return ((Entity) t).getId();
+ }
+
+ public static String md5(final String s) {
+ try {
+ final MessageDigest md = MessageDigest.getInstance("MD5");
+ md.update(s.getBytes(StandardCharsets.UTF_8));
+ return new String(Hex.encodeHex(md.digest()));
+ } catch (final NoSuchAlgorithmException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ public static String generateIdentifier(final String originalId, final String nsPrefix) {
+ return String.format("%s::%s", nsPrefix, md5(originalId));
+ }
+
+ public static String oldest(String dateA, String dateB) throws ParseException {
+
+ if (StringUtils.isBlank(dateA)) {
+ return dateB;
+ }
+ if (StringUtils.isBlank(dateB)) {
+ return dateA;
+ }
+ if (StringUtils.isNotBlank(dateA) && StringUtils.isNotBlank(dateB)) {
+
+ final Date a = DateParserUtils.parseDate(dateA);
+ final Date b = DateParserUtils.parseDate(dateB);
+
+ if (Objects.nonNull(a) && Objects.nonNull(b)) {
+ return a.before(b) ? dateA : dateB;
+ } else {
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/RefereedComparator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/RefereedComparator.java
new file mode 100644
index 000000000..75e29e176
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/RefereedComparator.java
@@ -0,0 +1,45 @@
+
+package eu.dnetlib.dhp.schema.oaf.common;
+
+import java.util.Comparator;
+
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
+
+public class RefereedComparator implements Comparator {
+
+ @Override
+ public int compare(Qualifier left, Qualifier right) {
+
+ if (left == null && right == null)
+ return 0;
+ if (left == null)
+ return 1;
+ if (right == null)
+ return -1;
+
+ String lClass = left.getClassid();
+ String rClass = right.getClassid();
+
+ if (lClass.equals(rClass))
+ return 0;
+
+ if (lClass.equals(ModelConstants.PEER_REVIEWED_CLASSID))
+ return -1;
+ if (rClass.equals(ModelConstants.PEER_REVIEWED_CLASSID))
+ return 1;
+
+ if (lClass.equals(ModelConstants.NON_PEER_REVIEWED_CLASSID))
+ return -1;
+ if (rClass.equals(ModelConstants.NON_PEER_REVIEWED_CLASSID))
+ return 1;
+
+ if (lClass.equals(ModelConstants.UNKNOWN))
+ return -1;
+ if (rClass.equals(ModelConstants.UNKNOWN))
+ return 1;
+
+ // Else (but unlikely), lexicographical ordering will do.
+ return lClass.compareTo(rClass);
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/RelationLabel.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/RelationLabel.java
new file mode 100644
index 000000000..45f46b737
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/common/RelationLabel.java
@@ -0,0 +1,33 @@
+
+package eu.dnetlib.dhp.schema.oaf.common;
+
+import eu.dnetlib.dhp.schema.oaf.Relation;
+
+public class RelationLabel {
+ private final Relation.RELCLASS relClass;
+ private final Relation.RELTYPE relType;
+ private final Relation.SUBRELTYPE subReltype;
+
+ public RelationLabel(Relation.RELCLASS relClass, Relation.RELTYPE relType, Relation.SUBRELTYPE subReltype) {
+ this.relClass = relClass;
+ this.relType = relType;
+ this.subReltype = subReltype;
+
+ }
+
+ public RelationLabel inverse() {
+ return new RelationLabel(relClass.getInverse(), relType, subReltype);
+ }
+
+ public Relation.RELTYPE getRelType() {
+ return relType;
+ }
+
+ public Relation.SUBRELTYPE getSubReltype() {
+ return subReltype;
+ }
+
+ public Relation.RELCLASS getRelClass() {
+ return relClass;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java
new file mode 100644
index 000000000..352cdad47
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/CleaningFunctions.java
@@ -0,0 +1,101 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+
+import org.apache.commons.lang3.StringUtils;
+
+import eu.dnetlib.dhp.schema.common.ModelConstants;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
+import lombok.val;
+
+public class CleaningFunctions {
+
+ public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
+
+ private static final String ALL_SPACES_REGEX = "(?:\\n|\\r|\\t|\\s)";
+ public static final String DOI_PREFIX = "10.";
+
+ public static final Set PID_BLACKLIST = new HashSet<>();
+
+ static {
+ PID_BLACKLIST.add("none");
+ PID_BLACKLIST.add("na");
+ }
+
+ public CleaningFunctions() {
+ }
+
+ /**
+ * Utility method that filter PID values on a per-type basis.
+ * @param s the PID whose value will be checked.
+ * @return false if the pid matches the filter criteria, true otherwise.
+ */
+ public static boolean pidFilter(StructuredProperty s) {
+ final String pidValue = s.getValue();
+ if (Objects.isNull(s.getQualifier()) ||
+ StringUtils.isBlank(pidValue) ||
+ StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
+ return false;
+ }
+ if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {
+ return false;
+ }
+ return !PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue);
+ }
+
+ /**
+ * Utility method that normalises PID values on a per-type basis.
+ * @param pid the PID whose value will be normalised.
+ * @return the PID containing the normalised value.
+ */
+ public static StructuredProperty normalizePidValue(StructuredProperty pid) {
+ pid
+ .setValue(
+ normalizePidValue(
+ pid.getQualifier().getClassid(),
+ pid.getValue()));
+
+ return pid;
+ }
+
+ /**
+ * This utility was moved from DOIBoost,
+ * it implements a better cleaning of DOI.
+ * In case of wrong DOI it raises an illegalArgumentException
+ * @param input DOI
+ * @return normalized DOI
+ */
+ private static String normalizeDOI(final String input) {
+ if (input == null)
+ throw new IllegalArgumentException("PID value cannot be empty");
+ final String replaced = input
+ .replaceAll(ALL_SPACES_REGEX, "")
+ .toLowerCase()
+ .replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
+ if (StringUtils.isEmpty(replaced.trim()))
+ throw new IllegalArgumentException("PID value normalized return empty string");
+ if (!replaced.contains("10."))
+ throw new IllegalArgumentException("DOI Must starts with 10.");
+ return replaced.substring(replaced.indexOf("10."));
+ }
+
+ public static String normalizePidValue(String pidType, String pidValue) {
+ String value = Optional
+ .ofNullable(pidValue)
+ .map(String::trim)
+ .orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
+
+ switch (pidType) {
+
+ // TODO add cleaning for more PID types as needed
+ case ModelConstants.DOI:
+ return normalizeDOI(value.toLowerCase());
+ }
+ return value;
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
index fc515b5b1..3aec286b2 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java
@@ -16,14 +16,16 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
-import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import me.xuender.unidecode.Unidecode;
public class GraphCleaningFunctions extends CleaningFunctions {
@@ -38,6 +40,127 @@ public class GraphCleaningFunctions extends CleaningFunctions {
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
+ public static T cleanContext(T value, String contextId, String verifyParam) {
+ if (ModelSupport.isSubClass(value, Result.class)) {
+ final Result res = (Result) value;
+ if (shouldCleanContext(res, verifyParam)) {
+ res
+ .setContext(
+ res
+ .getContext()
+ .stream()
+ .filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
+ .collect(Collectors.toList()));
+ }
+ return (T) res;
+ } else {
+ return value;
+ }
+ }
+
+ private static boolean shouldCleanContext(Result res, String verifyParam) {
+ boolean titleMatch = res
+ .getTitle()
+ .stream()
+ .filter(
+ t -> t
+ .getQualifier()
+ .getClassid()
+ .equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid()))
+ .anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()));
+
+ return titleMatch && Objects.nonNull(res.getContext());
+ }
+
+ public static T cleanCountry(T value, String[] verifyParam, Set hostedBy,
+ String collectedfrom, String country) {
+ if (ModelSupport.isSubClass(value, Result.class)) {
+ final Result res = (Result) value;
+ if (res.getInstance().stream().anyMatch(i -> hostedBy.contains(i.getHostedby().getKey())) ||
+ !res.getCollectedfrom().stream().anyMatch(cf -> cf.getValue().equals(collectedfrom))) {
+ return (T) res;
+ }
+
+ List ids = getPidsAndAltIds(res).collect(Collectors.toList());
+ if (ids
+ .stream()
+ .anyMatch(
+ p -> p
+ .getQualifier()
+ .getClassid()
+ .equals(PidType.doi.toString()) && pidInParam(p.getValue(), verifyParam))) {
+ res
+ .setCountry(
+ res
+ .getCountry()
+ .stream()
+ .filter(
+ c -> toTakeCountry(c, country))
+ .collect(Collectors.toList()));
+ }
+
+ return (T) res;
+ } else {
+ return value;
+ }
+ }
+
+ private static Stream getPidsAndAltIds(T r) {
+ final Stream resultPids = Optional
+ .ofNullable(r.getPid())
+ .map(Collection::stream)
+ .orElse(Stream.empty());
+
+ final Stream instancePids = Optional
+ .ofNullable(r.getInstance())
+ .map(
+ instance -> instance
+ .stream()
+ .flatMap(
+ i -> Optional
+ .ofNullable(i.getPid())
+ .map(Collection::stream)
+ .orElse(Stream.empty())))
+ .orElse(Stream.empty());
+
+ final Stream instanceAltIds = Optional
+ .ofNullable(r.getInstance())
+ .map(
+ instance -> instance
+ .stream()
+ .flatMap(
+ i -> Optional
+ .ofNullable(i.getAlternateIdentifier())
+ .map(Collection::stream)
+ .orElse(Stream.empty())))
+ .orElse(Stream.empty());
+
+ return Stream
+ .concat(
+ Stream.concat(resultPids, instancePids),
+ instanceAltIds);
+ }
+
+ private static boolean pidInParam(String value, String[] verifyParam) {
+ for (String s : verifyParam)
+ if (value.startsWith(s))
+ return true;
+ return false;
+ }
+
+ private static boolean toTakeCountry(Country c, String country) {
+ // If dataInfo is not set, or dataInfo.inferenceprovenance is not set or not present then it cannot be
+ // inserted via propagation
+ if (!Optional.ofNullable(c.getDataInfo()).isPresent())
+ return true;
+ if (!Optional.ofNullable(c.getDataInfo().getInferenceprovenance()).isPresent())
+ return true;
+ return !(c
+ .getClassid()
+ .equalsIgnoreCase(country) &&
+ c.getDataInfo().getInferenceprovenance().equals("propagation"));
+ }
+
public static T fixVocabularyNames(T value) {
if (value instanceof Datasource) {
// nothing to clean here
@@ -91,48 +214,31 @@ public class GraphCleaningFunctions extends CleaningFunctions {
}
public static boolean filter(T value) {
- if (Boolean.TRUE
- .equals(
- Optional
- .ofNullable(value)
- .map(
- o -> Optional
- .ofNullable(o.getDataInfo())
- .map(
- d -> Optional
- .ofNullable(d.getInvisible())
- .orElse(true))
- .orElse(true))
- .orElse(true))) {
- return true;
- }
-
- if (value instanceof Datasource) {
- // nothing to evaluate here
- } else if (value instanceof Project) {
- // nothing to evaluate here
- } else if (value instanceof Organization) {
- // nothing to evaluate here
- } else if (value instanceof Relation) {
- // nothing to clean here
- } else if (value instanceof Result) {
-
- Result r = (Result) value;
-
- if (Objects.isNull(r.getTitle()) || r.getTitle().isEmpty()) {
- return false;
- }
-
- if (value instanceof Publication) {
-
- } else if (value instanceof Dataset) {
-
- } else if (value instanceof OtherResearchProduct) {
-
- } else if (value instanceof Software) {
+ if (value instanceof Entity) {
+ Entity entity = (Entity) value;
+ if (Boolean.TRUE
+ .equals(
+ Optional
+ .ofNullable(entity)
+ .map(
+ o -> Optional
+ .ofNullable(o.getDataInfo())
+ .map(
+ d -> Optional
+ .ofNullable(d.getInvisible())
+ .orElse(true))
+ .orElse(true))
+ .orElse(true))) {
+ return true;
+ } else if (value instanceof Result) {
+ Result r = (Result) value;
+ if (Objects.isNull(r.getTitle()) || r.getTitle().isEmpty()) {
+ return false;
+ }
}
}
+
return true;
}
@@ -164,7 +270,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
if (Objects.nonNull(r.getDateofacceptance())) {
Optional date = cleanDateField(r.getDateofacceptance());
if (date.isPresent()) {
- r.getDateofacceptance().setValue(date.get());
+ r.setDateofacceptance(date.get());
} else {
r.setDateofacceptance(null);
}
@@ -185,7 +291,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
.collect(Collectors.toList()));
}
- if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) {
+ if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getName())) {
r.setPublisher(null);
}
if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
@@ -267,7 +373,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.getDescription()
.stream()
.filter(Objects::nonNull)
- .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
+ .filter(s -> StringUtils.isNotBlank(s))
.map(GraphCleaningFunctions::cleanValue)
.collect(Collectors.toList()));
}
@@ -288,29 +394,25 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.setInstancetype(
OafMapperUtils
.qualifier(
- "0038", "Other literature type", ModelConstants.DNET_PUBLICATION_RESOURCE,
- ModelConstants.DNET_PUBLICATION_RESOURCE));
+ "0038", "Other literature type", ModelConstants.DNET_PUBLICATION_RESOURCE));
} else if (r instanceof Dataset) {
i
.setInstancetype(
OafMapperUtils
.qualifier(
- "0039", "Other dataset type", ModelConstants.DNET_PUBLICATION_RESOURCE,
- ModelConstants.DNET_PUBLICATION_RESOURCE));
+ "0039", "Other dataset type", ModelConstants.DNET_PUBLICATION_RESOURCE));
} else if (r instanceof Software) {
i
.setInstancetype(
OafMapperUtils
.qualifier(
- "0040", "Other software type", ModelConstants.DNET_PUBLICATION_RESOURCE,
- ModelConstants.DNET_PUBLICATION_RESOURCE));
+ "0040", "Other software type", ModelConstants.DNET_PUBLICATION_RESOURCE));
} else if (r instanceof OtherResearchProduct) {
i
.setInstancetype(
OafMapperUtils
.qualifier(
- "0020", "Other ORP type", ModelConstants.DNET_PUBLICATION_RESOURCE,
- ModelConstants.DNET_PUBLICATION_RESOURCE));
+ "0020", "Other ORP type", ModelConstants.DNET_PUBLICATION_RESOURCE));
}
}
@@ -348,7 +450,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
if (Objects.nonNull(i.getDateofacceptance())) {
Optional date = cleanDateField(i.getDateofacceptance());
if (date.isPresent()) {
- i.getDateofacceptance().setValue(date.get());
+ i.setDateofacceptance(date.get());
} else {
i.setDateofacceptance(null);
}
@@ -456,10 +558,9 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return value;
}
- private static Optional cleanDateField(Field dateofacceptance) {
+ private static Optional cleanDateField(String dateofacceptance) {
return Optional
.ofNullable(dateofacceptance)
- .map(Field::getValue)
.map(GraphCleaningFunctions::cleanDate)
.filter(Objects::nonNull);
}
@@ -513,20 +614,17 @@ public class GraphCleaningFunctions extends CleaningFunctions {
private static void fixVocabName(Qualifier q, String vocabularyName) {
if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
q.setSchemeid(vocabularyName);
- q.setSchemename(vocabularyName);
}
}
private static AccessRight accessRight(String classid, String classname, String scheme) {
return OafMapperUtils
.accessRight(
- classid, classname, scheme, scheme);
+ classid, classname, scheme);
}
private static Qualifier qualifier(String classid, String classname, String scheme) {
- return OafMapperUtils
- .qualifier(
- classid, classname, scheme, scheme);
+ return OafMapperUtils.qualifier(classid, classname, scheme);
}
protected static StructuredProperty cleanValue(StructuredProperty s) {
@@ -539,9 +637,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return s;
}
- protected static Field cleanValue(Field s) {
- s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
- return s;
+ protected static String cleanValue(String s) {
+ return s.replaceAll(CLEANING_REGEX, " ");
}
}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java
new file mode 100644
index 000000000..0db1e1b63
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/IdentifierFactory.java
@@ -0,0 +1,317 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
+
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.util.*;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.collect.HashBiMap;
+import com.google.common.collect.Maps;
+
+import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
+
+/**
+ * Factory class for OpenAIRE identifiers in the Graph
+ */
+public class IdentifierFactory implements Serializable {
+
+ public static final String ID_SEPARATOR = "::";
+ public static final String ID_PREFIX_SEPARATOR = "|";
+
+ public static final int ID_PREFIX_LEN = 12;
+
+ /**
+ * Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE.
+ * The id of the record (source_::id) will be rewritten as pidType_::id)
+ */
+ public static final Map> PID_AUTHORITY = Maps.newHashMap();
+
+ static {
+ PID_AUTHORITY.put(PidType.doi, HashBiMap.create());
+ PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref");
+ PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite");
+ PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "ZENODO");
+ PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "Zenodo");
+
+ PID_AUTHORITY.put(PidType.pmc, HashBiMap.create());
+ PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
+ PID_AUTHORITY.get(PidType.pmc).put(PUBMED_CENTRAL_ID, "PubMed Central");
+
+ PID_AUTHORITY.put(PidType.pmid, HashBiMap.create());
+ PID_AUTHORITY.get(PidType.pmid).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
+ PID_AUTHORITY.get(PidType.pmid).put(PUBMED_CENTRAL_ID, "PubMed Central");
+
+ PID_AUTHORITY.put(PidType.arXiv, HashBiMap.create());
+ PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive");
+
+ PID_AUTHORITY.put(PidType.w3id, HashBiMap.create());
+ PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ROHub");
+ }
+
+ /**
+ * Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that
+ * PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word.
+ *
+ * If a record with the same id (same pid) comes from 2 data sources, the one coming from a delegated source wins. E.g. Zenodo records win over those from Datacite.
+ * See also https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/187 and the class dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
+ */
+ public static final Map> DELEGATED_PID_AUTHORITY = Maps.newHashMap();
+
+ static {
+ DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>());
+ DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo");
+ DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo");
+ DELEGATED_PID_AUTHORITY.put(PidType.w3id, new HashMap<>());
+ DELEGATED_PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ro-id");
+ }
+
+ /**
+ * Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] whose records are considered enrichment for the graph.
+ * Their OpenAIRE ID is built from the declared PID type. Are merged with their corresponding record, identified by
+ * the same OpenAIRE id.
+ */
+ public static final Map> ENRICHMENT_PROVIDER = Maps.newHashMap();
+
+ static {
+ ENRICHMENT_PROVIDER.put(PidType.doi, HashBiMap.create());
+ ENRICHMENT_PROVIDER.get(PidType.doi).put(OPEN_APC_ID, OPEN_APC_NAME);
+ }
+
+ public static Set delegatedAuthorityDatasourceIds() {
+ return DELEGATED_PID_AUTHORITY
+ .values()
+ .stream()
+ .flatMap(m -> m.keySet().stream())
+ .collect(Collectors.toCollection(HashSet::new));
+ }
+
+ public static List getPids(List pid, KeyValue collectedFrom) {
+ return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
+ }
+
+ public static String createDOIBoostIdentifier(T entity) {
+ if (entity == null)
+ return null;
+
+ StructuredProperty pid = null;
+ if (entity.getPid() != null) {
+ pid = entity
+ .getPid()
+ .stream()
+ .filter(Objects::nonNull)
+ .filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
+ .filter(CleaningFunctions::pidFilter)
+ .findAny()
+ .orElse(null);
+ } else {
+ if (entity.getInstance() != null) {
+ pid = entity
+ .getInstance()
+ .stream()
+ .filter(i -> i.getPid() != null)
+ .flatMap(i -> i.getPid().stream())
+ .filter(CleaningFunctions::pidFilter)
+ .findAny()
+ .orElse(null);
+ }
+ }
+ if (pid != null)
+ return idFromPid(entity, pid, true);
+ return null;
+ }
+
+ /**
+ * Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given
+ * entity T. Returns entity.id when none of the PIDs meet the selection criteria is available.
+ *
+ * @param entity the entity providing PIDs and a default ID.
+ * @param the specific entity type. Currently Organization and Result subclasses are supported.
+ * @param md5 indicates whether should hash the PID value or not.
+ * @return an identifier from the most relevant PID, entity.id otherwise
+ */
+ public static String createIdentifier(T entity, boolean md5) {
+
+ checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
+
+ final Map> pids = extractPids(entity);
+
+ return pids
+ .values()
+ .stream()
+ .flatMap(Set::stream)
+ .min(new PidComparator<>(entity))
+ .map(
+ min -> Optional
+ .ofNullable(pids.get(min.getQualifier().getClassid()))
+ .map(
+ p -> p
+ .stream()
+ .sorted(new PidValueComparator())
+ .findFirst()
+ .map(s -> idFromPid(entity, s, md5))
+ .orElseGet(entity::getId))
+ .orElseGet(entity::getId))
+ .orElseGet(entity::getId);
+ }
+
+ private static Map> extractPids(T entity) {
+ if (entity instanceof Result) {
+ return Optional
+ .ofNullable(((Result) entity).getInstance())
+ .map(IdentifierFactory::mapPids)
+ .orElse(new HashMap<>());
+ } else {
+ return entity
+ .getPid()
+ .stream()
+ .map(CleaningFunctions::normalizePidValue)
+ .filter(CleaningFunctions::pidFilter)
+ .collect(
+ Collectors
+ .groupingBy(
+ p -> p.getQualifier().getClassid(),
+ Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
+ }
+ }
+
+ private static Map> mapPids(List instance) {
+ return instance
+ .stream()
+ .map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
+ .flatMap(Function.identity())
+ .collect(
+ Collectors
+ .groupingBy(
+ p -> p.getQualifier().getClassid(),
+ Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
+ }
+
+ private static Stream pidFromInstance(List pid, KeyValue collectedFrom,
+ boolean mapHandles) {
+ return Optional
+ .ofNullable(pid)
+ .map(
+ pp -> pp
+ .stream()
+ // filter away PIDs provided by a DS that is not considered an authority for the
+ // given PID Type
+ .filter(p -> shouldFilterPidByCriteria(collectedFrom, p, mapHandles))
+ .map(CleaningFunctions::normalizePidValue)
+ .filter(p -> isNotFromDelegatedAuthority(collectedFrom, p))
+ .filter(CleaningFunctions::pidFilter))
+ .orElse(Stream.empty());
+ }
+
+ private static boolean shouldFilterPidByCriteria(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
+ final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
+
+ if (Objects.isNull(collectedFrom)) {
+ return false;
+ }
+
+ boolean isEnrich = Optional
+ .ofNullable(ENRICHMENT_PROVIDER.get(pType))
+ .map(
+ enrich -> enrich.containsKey(collectedFrom.getKey())
+ || enrich.containsValue(collectedFrom.getValue()))
+ .orElse(false);
+
+ boolean isAuthority = Optional
+ .ofNullable(PID_AUTHORITY.get(pType))
+ .map(
+ authorities -> authorities.containsKey(collectedFrom.getKey())
+ || authorities.containsValue(collectedFrom.getValue()))
+ .orElse(false);
+
+ return (mapHandles && pType.equals(PidType.handle)) || isEnrich || isAuthority;
+ }
+
+ private static boolean isNotFromDelegatedAuthority(KeyValue collectedFrom, StructuredProperty p) {
+ final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
+
+ final Map da = DELEGATED_PID_AUTHORITY.get(pType);
+ if (Objects.isNull(da)) {
+ return true;
+ }
+ if (!da.containsKey(collectedFrom.getKey())) {
+ return true;
+ }
+ return StringUtils.contains(p.getValue(), da.get(collectedFrom.getKey()));
+ }
+
+ /**
+ * @see {@link IdentifierFactory#createIdentifier(Entity, boolean)}
+ */
+ public static String createIdentifier(T entity) {
+
+ return createIdentifier(entity, true);
+ }
+
+ private static String idFromPid(T entity, StructuredProperty s, boolean md5) {
+ return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5);
+ }
+
+ public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
+ return new StringBuilder()
+ .append(numericPrefix)
+ .append(ID_PREFIX_SEPARATOR)
+ .append(createPrefix(pidType))
+ .append(ID_SEPARATOR)
+ .append(md5 ? ModelSupport.md5(pidValue) : pidValue)
+ .toString();
+ }
+
+ // create the prefix (length = 12)
+ private static String createPrefix(String pidType) {
+ StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN));
+ while (prefix.length() < ID_PREFIX_LEN) {
+ prefix.append("_");
+ }
+ return prefix.substring(0, ID_PREFIX_LEN);
+ }
+
+ public static String createOpenaireId(
+ final int prefix,
+ final String originalId,
+ final boolean to_md5) {
+ if (StringUtils.isBlank(originalId)) {
+ return null;
+ } else if (to_md5) {
+ final String nsPrefix = StringUtils.substringBefore(originalId, "::");
+ final String rest = StringUtils.substringAfter(originalId, "::");
+ return String.format("%s|%s::%s", prefix, nsPrefix, ModelSupport.md5(rest));
+ } else {
+ return String.format("%s|%s", prefix, originalId);
+ }
+ }
+
+ public static String createOpenaireId(
+ final String type,
+ final String originalId,
+ final boolean to_md5) {
+ switch (type) {
+ case "datasource":
+ return createOpenaireId(10, originalId, to_md5);
+ case "organization":
+ return createOpenaireId(20, originalId, to_md5);
+ case "person":
+ return createOpenaireId(30, originalId, to_md5);
+ case "project":
+ return createOpenaireId(40, originalId, to_md5);
+ default:
+ return createOpenaireId(50, originalId, to_md5);
+ }
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeBeanUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeBeanUtils.java
new file mode 100644
index 000000000..a318f991c
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeBeanUtils.java
@@ -0,0 +1,104 @@
+
+package eu.dnetlib.dhp.schema.oaf.utils;
+
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.beanutils.BeanUtilsBean;
+
+public class MergeBeanUtils {
+
+ /**
+ * Copies all properties from sources to destination, does not copy null values and any nested objects will attempted to be
+ * either cloned or copied into the existing object. This is recursive. Should not cause any infinite recursion.
+ * @param dest object to copy props into (will mutate)
+ * @param sources
+ * @param dest
+ * @return
+ * @throws IllegalAccessException
+ * @throws InvocationTargetException
+ */
+ public static T mergeIn(T dest, T... sources) {
+ // to keep from any chance infinite recursion lets limit each object to 1 instance at a time in the stack
+ final List