Compare commits

...

39 Commits

Author SHA1 Message Date
Alessia Bardi af1f8af788 Updated officialnmae of pangaea in hostedbymap for Datacite to avoid duplicate entries in the source filter of the portal 2023-06-07 10:29:47 +02:00
Sandro La Bruzzo 883dcd910e Make the tests compilable 2023-05-10 09:05:23 +02:00
Sandro La Bruzzo 706631586b Now it compiles 2023-05-09 13:54:57 +02:00
Sandro La Bruzzo ba380f5826 Makes the code compilable 2023-05-09 12:23:42 +02:00
Sandro La Bruzzo 88fffa6dbd - Removed ModelConstants and update Relation enum 2023-05-03 14:03:01 +02:00
Sandro La Bruzzo f8f4b9a018 -Renamed RelationInverse into RelationLabel.
-Removed findRelation from ModelSupport
-code formatted
2023-04-28 11:44:28 +02:00
Claudio Atzori 8c9a77d7eb WIP merged from graph_cleaning_refactoring, applying model simplification 2023-04-26 16:02:06 +02:00
Claudio Atzori cdb5aad82b merge from beta 2023-04-21 08:47:47 +02:00
Claudio Atzori 864f4051d3 [graph cleaning] added missing case 2023-04-05 11:35:47 +02:00
Claudio Atzori dead87917f [graph cleaning] cleanup 2023-04-04 13:13:43 +02:00
Claudio Atzori 2a6ba29b64 [graph cleaning] unit tests & cleanup 2023-04-04 12:34:51 +02:00
Claudio Atzori b502f86523 fixed input path supplemented to GetDatasourceFromCountry; adjusted the various spark.sql.shuffle.partitions 2023-03-24 13:09:12 +01:00
Claudio Atzori c07857fa37 [graph cleaning] unit tests & cleanup 2023-03-23 15:57:47 +01:00
Claudio Atzori 90e61a8aba [graph cleaning] WIP: refactoring of the cleaning stages, unit tests 2023-03-23 15:03:26 +01:00
Claudio Atzori 488d9a5eaa [graph cleaning] WIP: refactoring of the cleaning stages, unit tests 2023-03-23 10:41:13 +01:00
Claudio Atzori 4f5ba0ed52 [graph cleaning] WIP: refactoring of the cleaning stages, unit tests 2023-03-21 14:41:20 +01:00
Claudio Atzori 6d3d18d8b5 [graph cleaning] WIP: refactoring of the cleaning stages 2023-03-16 17:23:36 +01:00
Claudio Atzori c3dff7e199 Merge branch 'beta' into ticket_8369 2023-03-14 15:24:03 +01:00
Claudio Atzori 2a914e4463 merged from beta. It compiles 2023-03-10 16:00:48 +01:00
Claudio Atzori 74c40fd4e2 Merge branch 'ticket_8369' of https://code-repo.d4science.org/D-Net/dnet-hadoop into ticket_8369 2023-02-17 10:59:40 +01:00
Miriam Baglioni eca7ebab6d [enrichment] adding relations in one side only 2023-02-15 16:41:28 +01:00
Miriam Baglioni 624c62f62d [enrichment] changed to make it compile against the new model 2023-02-15 16:20:24 +01:00
Miriam Baglioni 8ddcf10075 resolved conflicts 2023-02-15 15:30:17 +01:00
Claudio Atzori 56495cc727 Merge branch 'ticket_8369' of https://code-repo.d4science.org/D-Net/dnet-hadoop into ticket_8369 2023-02-15 11:39:36 +01:00
Sandro La Bruzzo 8af8b2ea27 Now Crossref mapping and dhp-aggregation compile 2023-02-14 17:20:13 +01:00
Claudio Atzori 6b0a08e29c Merge branch 'ticket_8369' of https://code-repo.d4science.org/D-Net/dnet-hadoop into ticket_8369 2023-02-14 15:59:18 +01:00
Sandro La Bruzzo 990e3e2f60 code refactor 2023-02-14 10:32:17 +01:00
Sandro La Bruzzo 8f777af827 increased version number of dhp-code-style 2023-02-14 10:30:34 +01:00
Claudio Atzori 63c5c5848d wip: large refactoring 2023-02-13 16:15:19 +01:00
Miriam Baglioni 125657ed4c - 2023-02-13 12:40:14 +01:00
Claudio Atzori 508648e1d8 Merge branch 'ticket_8369' of https://code-repo.d4science.org/D-Net/dnet-hadoop into ticket_8369 2023-02-09 16:27:46 +01:00
Claudio Atzori d04610480a wip: large refactoring 2023-02-09 16:27:41 +01:00
Sandro La Bruzzo 6bd5a792dd Added vocabolary mocks 2023-02-09 16:13:34 +01:00
Sandro La Bruzzo 04b12a35cd Moved cressref Utility to dhp-aggregation 2023-02-09 16:11:59 +01:00
Claudio Atzori 934c1846f8 wip: large refactoring 2023-02-09 12:32:28 +01:00
Sandro La Bruzzo 606cada7a4 trying to adapt to the new data model 2023-02-07 11:11:05 +01:00
Claudio Atzori d9c9482a5b WIP: refactoring the internal graph data model and its utilities 2023-02-06 13:45:21 +01:00
Claudio Atzori 67735f7e9d WIP: refactoring model utilities 2023-02-02 17:02:23 +01:00
Claudio Atzori 1845dcfedf WIP: refactoring the internal graph data model and its utilities 2023-02-01 16:24:35 +01:00
262 changed files with 7918 additions and 5764 deletions

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId> <artifactId>dhp-build</artifactId>
<version>1.2.5-SNAPSHOT</version> <version>2.0.0-SNAPSHOT</version>
</parent> </parent>
<artifactId>dhp-build-assembly-resources</artifactId> <artifactId>dhp-build-assembly-resources</artifactId>

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId> <artifactId>dhp-build</artifactId>
<version>1.2.5-SNAPSHOT</version> <version>2.0.0-SNAPSHOT</version>
</parent> </parent>
<artifactId>dhp-build-properties-maven-plugin</artifactId> <artifactId>dhp-build-properties-maven-plugin</artifactId>

View File

@ -5,7 +5,7 @@
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-code-style</artifactId> <artifactId>dhp-code-style</artifactId>
<version>1.2.5-SNAPSHOT</version> <version>2.0.0-SNAPSHOT</version>
<packaging>jar</packaging> <packaging>jar</packaging>

View File

@ -4,7 +4,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId> <artifactId>dhp</artifactId>
<version>1.2.5-SNAPSHOT</version> <version>2.0.0-SNAPSHOT</version>
</parent> </parent>
<artifactId>dhp-build</artifactId> <artifactId>dhp-build</artifactId>
<packaging>pom</packaging> <packaging>pom</packaging>

View File

@ -5,7 +5,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId> <artifactId>dhp</artifactId>
<version>1.2.5-SNAPSHOT</version> <version>2.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.common.action; package eu.dnetlib.dhp.common.action;
import static eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory.*;
import java.io.BufferedWriter; import java.io.BufferedWriter;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
@ -19,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.common.action.model.MasterDuplicate; import eu.dnetlib.dhp.common.action.model.MasterDuplicate;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ReadDatasourceMasterDuplicateFromDB { public class ReadDatasourceMasterDuplicateFromDB {
@ -59,8 +60,8 @@ public class ReadDatasourceMasterDuplicateFromDB {
final String masterId = rs.getString("masterId"); final String masterId = rs.getString("masterId");
final String masterName = rs.getString("masterName"); final String masterName = rs.getString("masterName");
md.setDuplicateId(OafMapperUtils.createOpenaireId(10, duplicateId, true)); md.setDuplicateId(createOpenaireId(10, duplicateId, true));
md.setMasterId(OafMapperUtils.createOpenaireId(10, masterId, true)); md.setMasterId(createOpenaireId(10, masterId, true));
md.setMasterName(masterName); md.setMasterName(masterName);
return md; return md;

View File

@ -9,13 +9,13 @@ import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHeaders; import org.apache.http.HttpHeaders;
import org.apache.http.entity.ContentType; import org.apache.http.entity.ContentType;
import org.jetbrains.annotations.NotNull;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModelList;
import okhttp3.*; import okhttp3.*;
import org.jetbrains.annotations.NotNull;
public class ZenodoAPIClient implements Serializable { public class ZenodoAPIClient implements Serializable {
@ -80,7 +80,7 @@ public class ZenodoAPIClient implements Serializable {
int responseCode = conn.getResponseCode(); int responseCode = conn.getResponseCode();
conn.disconnect(); conn.disconnect();
if(!checkOKStatus(responseCode)) if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body); throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class); ZenodoModel newSubmission = new Gson().fromJson(body, ZenodoModel.class);
@ -115,7 +115,7 @@ public class ZenodoAPIClient implements Serializable {
} }
int responseCode = conn.getResponseCode(); int responseCode = conn.getResponseCode();
if(! checkOKStatus(responseCode)){ if (!checkOKStatus(responseCode)) {
throw new IOException("Unexpected code " + responseCode + getBody(conn)); throw new IOException("Unexpected code " + responseCode + getBody(conn));
} }
@ -126,7 +126,7 @@ public class ZenodoAPIClient implements Serializable {
private String getBody(HttpURLConnection conn) throws IOException { private String getBody(HttpURLConnection conn) throws IOException {
String body = "{}"; String body = "{}";
try (BufferedReader br = new BufferedReader( try (BufferedReader br = new BufferedReader(
new InputStreamReader(conn.getInputStream(), "utf-8"))) { new InputStreamReader(conn.getInputStream(), "utf-8"))) {
StringBuilder response = new StringBuilder(); StringBuilder response = new StringBuilder();
String responseLine = null; String responseLine = null;
while ((responseLine = br.readLine()) != null) { while ((responseLine = br.readLine()) != null) {
@ -155,7 +155,6 @@ public class ZenodoAPIClient implements Serializable {
conn.setDoOutput(true); conn.setDoOutput(true);
conn.setRequestMethod("PUT"); conn.setRequestMethod("PUT");
try (OutputStream os = conn.getOutputStream()) { try (OutputStream os = conn.getOutputStream()) {
byte[] input = metadata.getBytes("utf-8"); byte[] input = metadata.getBytes("utf-8");
os.write(input, 0, input.length); os.write(input, 0, input.length);
@ -164,19 +163,18 @@ public class ZenodoAPIClient implements Serializable {
final int responseCode = conn.getResponseCode(); final int responseCode = conn.getResponseCode();
conn.disconnect(); conn.disconnect();
if(!checkOKStatus(responseCode)) if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + getBody(conn)); throw new IOException("Unexpected code " + responseCode + getBody(conn));
return responseCode; return responseCode;
} }
private boolean checkOKStatus(int responseCode) { private boolean checkOKStatus(int responseCode) {
if(HttpURLConnection.HTTP_OK != responseCode || if (HttpURLConnection.HTTP_OK != responseCode ||
HttpURLConnection.HTTP_CREATED != responseCode) HttpURLConnection.HTTP_CREATED != responseCode)
return true ; return true;
return false; return false;
} }
@ -233,7 +231,6 @@ public class ZenodoAPIClient implements Serializable {
conn.setDoOutput(true); conn.setDoOutput(true);
conn.setRequestMethod("POST"); conn.setRequestMethod("POST");
try (OutputStream os = conn.getOutputStream()) { try (OutputStream os = conn.getOutputStream()) {
byte[] input = json.getBytes("utf-8"); byte[] input = json.getBytes("utf-8");
os.write(input, 0, input.length); os.write(input, 0, input.length);
@ -245,7 +242,7 @@ public class ZenodoAPIClient implements Serializable {
int responseCode = conn.getResponseCode(); int responseCode = conn.getResponseCode();
conn.disconnect(); conn.disconnect();
if(!checkOKStatus(responseCode)) if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body); throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
@ -290,13 +287,12 @@ public class ZenodoAPIClient implements Serializable {
int responseCode = conn.getResponseCode(); int responseCode = conn.getResponseCode();
conn.disconnect(); conn.disconnect();
if(!checkOKStatus(responseCode)) if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body); throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
bucket = zenodoModel.getLinks().getBucket(); bucket = zenodoModel.getLinks().getBucket();
return responseCode; return responseCode;
} }
@ -331,22 +327,16 @@ public class ZenodoAPIClient implements Serializable {
conn.setDoOutput(true); conn.setDoOutput(true);
conn.setRequestMethod("GET"); conn.setRequestMethod("GET");
String body = getBody(conn); String body = getBody(conn);
int responseCode = conn.getResponseCode(); int responseCode = conn.getResponseCode();
conn.disconnect(); conn.disconnect();
if(!checkOKStatus(responseCode)) if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body); throw new IOException("Unexpected code " + responseCode + body);
return body; return body;
} }
private String getBucket(String inputUurl) throws IOException { private String getBucket(String inputUurl) throws IOException {
@ -363,15 +353,13 @@ public class ZenodoAPIClient implements Serializable {
int responseCode = conn.getResponseCode(); int responseCode = conn.getResponseCode();
conn.disconnect(); conn.disconnect();
if(!checkOKStatus(responseCode)) if (!checkOKStatus(responseCode))
throw new IOException("Unexpected code " + responseCode + body); throw new IOException("Unexpected code " + responseCode + body);
ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class); ZenodoModel zenodoModel = new Gson().fromJson(body, ZenodoModel.class);
return zenodoModel.getLinks().getBucket(); return zenodoModel.getLinks().getBucket();
} }
} }

View File

@ -73,11 +73,11 @@ public class Vocabulary implements Serializable {
public Qualifier getTermAsQualifier(final String termId, boolean strict) { public Qualifier getTermAsQualifier(final String termId, boolean strict) {
final VocabularyTerm term = getTerm(termId); final VocabularyTerm term = getTerm(termId);
if (Objects.nonNull(term)) { if (Objects.nonNull(term)) {
return OafMapperUtils.qualifier(term.getId(), term.getName(), getId(), getName()); return OafMapperUtils.qualifier(term.getId(), term.getName(), getId());
} else if (Objects.isNull(term) && strict) { } else if (Objects.isNull(term) && strict) {
return OafMapperUtils.unknown(getId(), getName()); return OafMapperUtils.unknown(getId());
} else { } else {
return OafMapperUtils.qualifier(termId, termId, getId(), getName()); return OafMapperUtils.qualifier(termId, termId, getId());
} }
} }

View File

@ -125,12 +125,12 @@ public class VocabularyGroup implements Serializable {
if (vocabularyExists(vocId)) { if (vocabularyExists(vocId)) {
return vocs.get(vocId.toLowerCase()).getTermAsQualifier(id); return vocs.get(vocId.toLowerCase()).getTermAsQualifier(id);
} }
return OafMapperUtils.qualifier(id, id, "", ""); return OafMapperUtils.qualifier(id, id, "");
} }
public Qualifier getSynonymAsQualifier(final String vocId, final String syn) { public Qualifier getSynonymAsQualifier(final String vocId, final String syn) {
if (StringUtils.isBlank(vocId)) { if (StringUtils.isBlank(vocId)) {
return OafMapperUtils.unknown("", ""); return OafMapperUtils.unknown("");
} }
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn); return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
} }
@ -142,7 +142,7 @@ public class VocabularyGroup implements Serializable {
*/ */
public Qualifier getSynonymAsQualifierCaseSensitive(final String vocId, final String syn) { public Qualifier getSynonymAsQualifierCaseSensitive(final String vocId, final String syn) {
if (StringUtils.isBlank(vocId)) { if (StringUtils.isBlank(vocId)) {
return OafMapperUtils.unknown("", ""); return OafMapperUtils.unknown("");
} }
return vocs.get(vocId).getSynonymAsQualifier(syn); return vocs.get(vocId).getSynonymAsQualifier(syn);
} }

View File

@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils;
import com.wcohen.ss.JaroWinkler; import com.wcohen.ss.JaroWinkler;
import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.AuthorPid;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.model.Person; import eu.dnetlib.pace.model.Person;
import scala.Tuple2; import scala.Tuple2;
@ -75,7 +76,7 @@ public class AuthorMerger {
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1)); .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
// <pid, Author> (list of pid that are missing in the other list) // <pid, Author> (list of pid that are missing in the other list)
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich final List<Tuple2<AuthorPid, Author>> pidToEnrich = enrich
.stream() .stream()
.filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap( .flatMap(
@ -111,7 +112,7 @@ public class AuthorMerger {
// TERRIBLE HACK but for some reason when we create and Array with Arrays.asList, // TERRIBLE HACK but for some reason when we create and Array with Arrays.asList,
// it creates of fixed size, and the add method raise UnsupportedOperationException at // it creates of fixed size, and the add method raise UnsupportedOperationException at
// java.util.AbstractList.add // java.util.AbstractList.add
final List<StructuredProperty> tmp = new ArrayList<>(r.getPid()); final List<AuthorPid> tmp = new ArrayList<>(r.getPid());
tmp.add(a._1()); tmp.add(a._1());
r.setPid(tmp); r.setPid(tmp);
} }
@ -120,10 +121,12 @@ public class AuthorMerger {
} }
public static String pidToComparableString(StructuredProperty pid) { public static String pidToComparableString(StructuredProperty pid) {
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() return pid.toComparableString();
: ""; /*
return (pid.getQualifier() != null ? classid : "") * final String classid = pid.getQualifier().getClassid() != null ?
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); * pid.getQualifier().getClassid().toLowerCase() : ""; return (pid.getQualifier() != null ? classid : "") +
* (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
*/
} }
public static int countAuthorsPids(List<Author> authors) { public static int countAuthorsPids(List<Author> authors) {

View File

@ -21,8 +21,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
public class DispatchEntitiesSparkJob { public class DispatchEntitiesSparkJob {
@ -58,7 +58,7 @@ public class DispatchEntitiesSparkJob {
log.info("graphTableClassName: {}", graphTableClassName); log.info("graphTableClassName: {}", graphTableClassName);
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName); Class<? extends Entity> entityClazz = (Class<? extends Entity>) Class.forName(graphTableClassName);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
runWithSparkSession( runWithSparkSession(

View File

@ -30,9 +30,9 @@ import com.jayway.jsonpath.Option;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import scala.Tuple2; import scala.Tuple2;
/** /**
@ -87,17 +87,17 @@ public class GroupEntitiesSparkJob {
String inputPath, String inputPath,
String outputPath) { String outputPath) {
final TypedColumn<OafEntity, OafEntity> aggregator = new GroupingAggregator().toColumn(); final TypedColumn<Entity, Entity> aggregator = new GroupingAggregator().toColumn();
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
spark spark
.read() .read()
.textFile(toSeq(listEntityPaths(inputPath, sc))) .textFile(toSeq(listEntityPaths(inputPath, sc)))
.map((MapFunction<String, OafEntity>) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(OafEntity.class)) .map((MapFunction<String, Entity>) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(Entity.class))
.filter((FilterFunction<OafEntity>) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e))) .filter((FilterFunction<Entity>) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e)))
.groupByKey((MapFunction<OafEntity, String>) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING()) .groupByKey((MapFunction<Entity, String>) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING())
.agg(aggregator) .agg(aggregator)
.map( .map(
(MapFunction<Tuple2<String, OafEntity>, String>) t -> t._2().getClass().getName() + (MapFunction<Tuple2<String, Entity>, String>) t -> t._2().getClass().getName() +
"|" + OBJECT_MAPPER.writeValueAsString(t._2()), "|" + OBJECT_MAPPER.writeValueAsString(t._2()),
Encoders.STRING()) Encoders.STRING())
.write() .write()
@ -106,48 +106,48 @@ public class GroupEntitiesSparkJob {
.text(outputPath); .text(outputPath);
} }
public static class GroupingAggregator extends Aggregator<OafEntity, OafEntity, OafEntity> { public static class GroupingAggregator extends Aggregator<Entity, Entity, Entity> {
@Override @Override
public OafEntity zero() { public Entity zero() {
return null; return null;
} }
@Override @Override
public OafEntity reduce(OafEntity b, OafEntity a) { public Entity reduce(Entity b, Entity a) {
return mergeAndGet(b, a); return mergeAndGet(b, a);
} }
private OafEntity mergeAndGet(OafEntity b, OafEntity a) { private Entity mergeAndGet(Entity b, Entity a) {
if (Objects.nonNull(a) && Objects.nonNull(b)) { if (Objects.nonNull(a) && Objects.nonNull(b)) {
return OafMapperUtils.mergeEntities(b, a); return MergeUtils.merge(b, a, true);
} }
return Objects.isNull(a) ? b : a; return Objects.isNull(a) ? b : a;
} }
@Override @Override
public OafEntity merge(OafEntity b, OafEntity a) { public Entity merge(Entity b, Entity a) {
return mergeAndGet(b, a); return mergeAndGet(b, a);
} }
@Override @Override
public OafEntity finish(OafEntity j) { public Entity finish(Entity j) {
return j; return j;
} }
@Override @Override
public Encoder<OafEntity> bufferEncoder() { public Encoder<Entity> bufferEncoder() {
return Encoders.kryo(OafEntity.class); return Encoders.kryo(Entity.class);
} }
@Override @Override
public Encoder<OafEntity> outputEncoder() { public Encoder<Entity> outputEncoder() {
return Encoders.kryo(OafEntity.class); return Encoders.kryo(Entity.class);
} }
} }
private static OafEntity parseOaf(String s) { private static Entity parseOaf(String s) {
DocumentContext dc = JsonPath DocumentContext dc = JsonPath
.parse(s, Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS)); .parse(s, Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS));
@ -184,7 +184,7 @@ public class GroupEntitiesSparkJob {
} }
} }
private static <T extends OafEntity> OafEntity parse(String s, Class<T> clazz) { private static <T extends Entity> Entity parse(String s, Class<T> clazz) {
try { try {
return OBJECT_MAPPER.readValue(s, clazz); return OBJECT_MAPPER.readValue(s, clazz);
} catch (IOException e) { } catch (IOException e) {

View File

@ -0,0 +1,172 @@
package eu.dnetlib.dhp.schema.common;
import eu.dnetlib.dhp.schema.oaf.AccessRight;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class ModelConstants {
private ModelConstants() {
}
public static final String DOI = "doi";
public static final String ORCID = "orcid";
public static final String ORCID_PENDING = "orcid_pending";
public static final String ORCID_CLASSNAME = "Open Researcher and Contributor ID";
public static final String ORCID_DS = ORCID.toUpperCase();
public static final String CROSSREF_ID = "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2";
public static final String CROSSREF_NAME = "Crossref";
public static final String DATACITE_ID = "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254";
public static final String ZENODO_OD_ID = "10|opendoar____::358aee4cc897452c00244351e4d91f69";
public static final String ZENODO_R3_ID = "10|re3data_____::7b0ad08687b2c960d5aeef06f811d5e6";
public static final String EUROPE_PUBMED_CENTRAL_ID = "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c";
public static final String PUBMED_CENTRAL_ID = "10|opendoar____::eda80a3d5b344bc40f3bc04f65b7a357";
public static final String ARXIV_ID = "10|opendoar____::6f4922f45568161a8cdf4ad2299f6d23";
public static final String ROHUB_ID = "10|fairsharing_::1b69ebedb522700034547abc5652ffac";
public static final String OPENORGS_NAME = "OpenOrgs Database";
public static final String OPENOCITATIONS_NAME = "OpenCitations";
public static final String OPENOCITATIONS_ID = "10|openaire____::c06df618c5de1c786535ccf3f8b7b059";
public static final String OPEN_APC_NAME = "OpenAPC Global Initiative";
public static final String OPEN_APC_ID = "10|apc_________::e2b1600b229fc30663c8a1f662debddf";
// VOCABULARY VALUE
public static final String ACCESS_RIGHT_OPEN = "OPEN";
public static final String ACCESS_RIGHT_EMBARGO = "EMBARGO";
public static final String ACCESS_RIGHT_CLOSED = "CLOSED";
public static final String DNET_SUBJECT_KEYWORD = "keyword";
public static final String DNET_SUBJECT_FOS_CLASSID = "FOS";
public static final String DNET_SUBJECT_FOS_CLASSNAME = "Fields of Science and Technology classification";
public static final String DNET_SUBJECT_TYPOLOGIES = "dnet:subject_classification_typologies";
public static final String DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
public static final String DNET_PUBLICATION_RESOURCE = "dnet:publication_resource";
public static final String DNET_ACCESS_MODES = "dnet:access_modes";
public static final String DNET_LANGUAGES = "dnet:languages";
public static final String DNET_PID_TYPES = "dnet:pid_types";
public static final String DNET_DATACITE_DATE = "dnet:dataCite_date";
public static final String DNET_DATACITE_TITLE = "dnet:dataCite_title";
public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource";
public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
public static final String DNET_COUNTRY_TYPE = "dnet:countries";
public static final String DNET_REVIEW_LEVELS = "dnet:review_levels";
public static final String DNET_PROGRAMMING_LANGUAGES = "dnet:programming_languages";
public static final String DNET_EXTERNAL_REFERENCE_TYPE = "dnet:externalReference_typologies";
public static final String DNET_RELATION_RELTYPE = "dnet:relation_relType";
public static final String DNET_RELATION_SUBRELTYPE = "dnet:relation_subRelType";
public static final String DNET_RELATION_RELCLASS = "dnet:relation_relClass";
public static final String PEER_REVIEWED_CLASSNAME = "nonPeerReviewed";
public static final String NON_PEER_REVIEWED_CLASSNAME = "nonPeerReviewed";
public static final String PEER_REVIEWED_CLASSID = "0001";
public static final String NON_PEER_REVIEWED_CLASSID = "0002";
public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository";
public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry";
public static final String SYSIMPORT_ACTIONSET = "sysimport:actionset";
public static final String SYSIMPORT_ORCID_NO_DOI = "sysimport:actionset:orcidworks-no-doi";
public static final String USER_CLAIM = "user:claim";
public static final String HARVESTED = "Harvested";
public static final String PROVENANCE_DEDUP = "sysimport:dedup";
public static final String PROVENANCE_ENRICH = "sysimport:enrich";
public static final Qualifier PROVENANCE_ACTION_SET_QUALIFIER = qualifier(
SYSIMPORT_ACTIONSET, SYSIMPORT_ACTIONSET, DNET_PROVENANCE_ACTIONS);
public static final String UNKNOWN = "UNKNOWN";
public static final String NOT_AVAILABLE = "not available";
public static final Qualifier REPOSITORY_PROVENANCE_ACTIONS = qualifier(
SYSIMPORT_CROSSWALK_REPOSITORY, SYSIMPORT_CROSSWALK_REPOSITORY,
DNET_PROVENANCE_ACTIONS);
public static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION = qualifier(
SYSIMPORT_CROSSWALK_ENTITYREGISTRY, SYSIMPORT_CROSSWALK_ENTITYREGISTRY,
DNET_PROVENANCE_ACTIONS);
public static final String UNKNOWN_REPOSITORY_ORIGINALID = "openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18";
public static final KeyValue UNKNOWN_REPOSITORY = keyValue(
"10|openaire____::55045bd2a65019fd8e6741a755395c8c", "Unknown Repository");
public static final Qualifier UNKNOWN_COUNTRY = qualifier(UNKNOWN, "Unknown", DNET_COUNTRY_TYPE);
public static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
"main title", "main title", DNET_DATACITE_TITLE);
public static final Qualifier ALTERNATIVE_TITLE_QUALIFIER = qualifier(
"alternative title", "alternative title", DNET_DATACITE_TITLE);
public static final Qualifier SUBTITLE_QUALIFIER = qualifier("subtitle", "subtitle", DNET_DATACITE_TITLE);
public static final AccessRight OPEN_ACCESS_RIGHT() {
final AccessRight result = new AccessRight();
result.setClassid(ACCESS_RIGHT_OPEN);
result.setClassid(ACCESS_RIGHT_OPEN);
result.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
return result;
}
public static final AccessRight RESTRICTED_ACCESS_RIGHT() {
final AccessRight result = new AccessRight();
result.setClassid("RESTRICTED");
result.setClassname("Restricted");
result.setSchemeid(ModelConstants.DNET_ACCESS_MODES);
return result;
}
public static final AccessRight UNKNOWN_ACCESS_RIGHT() {
return OafMapperUtils
.accessRight(
ModelConstants.UNKNOWN,
ModelConstants.NOT_AVAILABLE,
ModelConstants.DNET_ACCESS_MODES);
}
public static final AccessRight EMBARGOED_ACCESS_RIGHT() {
return OafMapperUtils
.accessRight(
ACCESS_RIGHT_EMBARGO,
ACCESS_RIGHT_EMBARGO,
DNET_ACCESS_MODES);
}
public static final AccessRight CLOSED_ACCESS_RIGHT() {
return OafMapperUtils
.accessRight(
ACCESS_RIGHT_CLOSED,
"Closed Access",
ModelConstants.DNET_ACCESS_MODES);
}
private static Qualifier qualifier(
final String classid,
final String classname,
final String schemeid) {
final Qualifier q = new Qualifier();
q.setClassid(classid);
q.setClassname(classname);
q.setSchemeid(schemeid);
return q;
}
private static KeyValue keyValue(final String key, final String value) {
final KeyValue kv = new KeyValue();
kv.setKey(key);
kv.setValue(value);
return kv;
}
}

View File

@ -0,0 +1,69 @@
package eu.dnetlib.dhp.schema.oaf.common;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class AccessRightComparator<T extends Qualifier> implements Comparator<T> {
@Override
public int compare(T left, T right) {
if (left == null && right == null)
return 0;
if (left == null)
return 1;
if (right == null)
return -1;
String lClass = left.getClassid();
String rClass = right.getClassid();
if (lClass.equals(rClass))
return 0;
if (lClass.equals("OPEN SOURCE"))
return -1;
if (rClass.equals("OPEN SOURCE"))
return 1;
if (lClass.equals("OPEN"))
return -1;
if (rClass.equals("OPEN"))
return 1;
if (lClass.equals("6MONTHS"))
return -1;
if (rClass.equals("6MONTHS"))
return 1;
if (lClass.equals("12MONTHS"))
return -1;
if (rClass.equals("12MONTHS"))
return 1;
if (lClass.equals("EMBARGO"))
return -1;
if (rClass.equals("EMBARGO"))
return 1;
if (lClass.equals("RESTRICTED"))
return -1;
if (rClass.equals("RESTRICTED"))
return 1;
if (lClass.equals("CLOSED"))
return -1;
if (rClass.equals("CLOSED"))
return 1;
if (lClass.equals("UNKNOWN"))
return -1;
if (rClass.equals("UNKNOWN"))
return 1;
// Else (but unlikely), lexicographical ordering will do.
return lClass.compareTo(rClass);
}
}

View File

@ -0,0 +1,21 @@
package eu.dnetlib.dhp.schema.oaf.common;
import eu.dnetlib.dhp.schema.oaf.Entity;
/** Actual entity types in the Graph */
public enum EntityType {
publication, dataset, otherresearchproduct, software, datasource, organization, project;
/**
* Resolves the EntityType, given the relative class name
*
* @param clazz the given class name
* @param <T> actual OafEntity subclass
* @return the EntityType associated to the given class
*/
public static <T extends Entity> EntityType fromClass(Class<T> clazz) {
return EntityType.valueOf(clazz.getSimpleName().toLowerCase());
}
}

View File

@ -0,0 +1,7 @@
package eu.dnetlib.dhp.schema.oaf.common;
/** Main entity types in the Graph */
public enum MainEntityType {
result, datasource, organization, project
}

View File

@ -0,0 +1,352 @@
package eu.dnetlib.dhp.schema.oaf.common;
import static com.google.common.base.Preconditions.checkArgument;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
import java.util.*;
import java.util.function.Function;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
/** Oaf model utility methods. */
public class ModelSupport {
/** Defines the mapping between the actual entity type and the main entity type */
private static final Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
static {
entityMapping.put(EntityType.publication, MainEntityType.result);
entityMapping.put(EntityType.dataset, MainEntityType.result);
entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result);
entityMapping.put(EntityType.software, MainEntityType.result);
entityMapping.put(EntityType.datasource, MainEntityType.datasource);
entityMapping.put(EntityType.organization, MainEntityType.organization);
entityMapping.put(EntityType.project, MainEntityType.project);
}
/**
* Defines the mapping between the actual entity types and the relative classes implementing them
*/
public static final Map<EntityType, Class> entityTypes = Maps.newHashMap();
static {
entityTypes.put(EntityType.datasource, Datasource.class);
entityTypes.put(EntityType.organization, Organization.class);
entityTypes.put(EntityType.project, Project.class);
entityTypes.put(EntityType.dataset, Dataset.class);
entityTypes.put(EntityType.otherresearchproduct, OtherResearchProduct.class);
entityTypes.put(EntityType.software, Software.class);
entityTypes.put(EntityType.publication, Publication.class);
}
public static final Map<String, Class> oafTypes = Maps.newHashMap();
static {
oafTypes.put("datasource", Datasource.class);
oafTypes.put("organization", Organization.class);
oafTypes.put("project", Project.class);
oafTypes.put("dataset", Dataset.class);
oafTypes.put("otherresearchproduct", OtherResearchProduct.class);
oafTypes.put("software", Software.class);
oafTypes.put("publication", Publication.class);
oafTypes.put("relation", Relation.class);
}
public static final Map<Class, String> idPrefixMap = Maps.newHashMap();
static {
idPrefixMap.put(Datasource.class, "10");
idPrefixMap.put(Organization.class, "20");
idPrefixMap.put(Project.class, "40");
idPrefixMap.put(Dataset.class, "50");
idPrefixMap.put(OtherResearchProduct.class, "50");
idPrefixMap.put(Software.class, "50");
idPrefixMap.put(Publication.class, "50");
}
public static final Map<String, String> entityIdPrefix = Maps.newHashMap();
static {
entityIdPrefix.put("datasource", "10");
entityIdPrefix.put("organization", "20");
entityIdPrefix.put("project", "40");
entityIdPrefix.put("result", "50");
}
public static final Map<String, String> idPrefixEntity = Maps.newHashMap();
static {
idPrefixEntity.put("10", "datasource");
idPrefixEntity.put("20", "organization");
idPrefixEntity.put("40", "project");
idPrefixEntity.put("50", "result");
}
public static String getEntityTypeFromId(final String id) {
//TODO We should create a class which define the identifier and parse it
if (StringUtils.isBlank(id))
return null;
return idPrefixEntity.get(id.substring(0,2));
}
/**
* Helper method: combines the relation attributes
* @param relType
* @param subRelType
* @param relClass
* @return
*/
public static String rel(String relType, String subRelType, String relClass) {
return String.format("%s_%s_%s", relType, subRelType, relClass);
}
/**
* Helper method: deserialize the relation attributes serialized with rel
* @param deserialization
* @return
*/
public static RelationLabel unRel(String deserialization) {
final String[] s = deserialization.split("_");
if (s!= null && s.length==3) {
final Relation.RELTYPE currentRelType = Relation.RELTYPE.valueOf(s[0]);
final Relation.SUBRELTYPE currentSubRelType = Relation.SUBRELTYPE.valueOf(s[1]);
final Relation.RELCLASS currentRelClass = Relation.RELCLASS.valueOf(s[2]);
return new RelationLabel(currentRelClass, currentRelType, currentSubRelType);
}
throw new IllegalArgumentException("Invalid relationship format for "+ deserialization);
}
private static final String schemeTemplate = "dnet:%s_%s_relations";
public static final String DATE_FORMAT = "yyyy-MM-dd";
private ModelSupport() {
}
public static <E extends Entity> String getIdPrefix(Class<E> clazz) {
return idPrefixMap.get(clazz);
}
public static <X extends Oaf, Y extends Oaf, Z extends Oaf> Boolean sameClass(X left, Y right,
Class<Z> superClazz) {
return isSubClass(left, superClazz) && isSubClass(right, superClazz);
}
/**
* Checks subclass-superclass relationship.
*
* @param subClazzObject Subclass object instance
* @param superClazzObject Superclass object instance
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Y superClazzObject) {
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
}
/**
* Checks subclass-superclass relationship.
*
* @param subClazzObject Subclass object instance
* @param superClazz Superclass class
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Class<Y> superClazz) {
return isSubClass(subClazzObject.getClass(), superClazz);
}
/**
* Checks subclass-superclass relationship.
*
* @param subClazz Subclass class
* @param superClazz Superclass class
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
Class<X> subClazz, Class<Y> superClazz) {
return superClazz.isAssignableFrom(subClazz);
}
/**
* Lists all the OAF model classes
*
* @param <T>
* @return
*/
public static <T extends Entity> Class<T>[] getOafModelClasses() {
return new Class[] {
AccessRight.class,
Author.class,
AuthorPid.class,
Context.class,
Country.class,
DataInfo.class,
Dataset.class,
Datasource.class,
Entity.class,
EntityDataInfo.class,
EoscIfGuidelines.class,
ExternalReference.class,
ExtraInfo.class,
GeoLocation.class,
H2020Classification.class,
H2020Programme.class,
Instance.class,
Journal.class,
KeyValue.class,
License.class,
Measure.class,
OAIProvenance.class,
OpenAccessRoute.class,
Organization.class,
OriginDescription.class,
OtherResearchProduct.class,
Project.class,
Provenance.class,
Publication.class,
Publisher.class,
Qualifier.class,
Relation.class,
Result.class,
Software.class,
StructuredProperty.class,
Subject.class
};
}
public static String getMainType(final EntityType type) {
return entityMapping.get(type).name();
}
public static boolean isResult(EntityType type) {
return MainEntityType.result.name().equals(getMainType(type));
}
public static String getScheme(final String sourceType, final String targetType) {
return String
.format(
schemeTemplate,
entityMapping.get(EntityType.valueOf(sourceType)).name(),
entityMapping.get(EntityType.valueOf(targetType)).name());
}
public static String tableIdentifier(String dbName, String tableName) {
checkArgument(StringUtils.isNotBlank(dbName), "DB name cannot be empty");
checkArgument(StringUtils.isNotBlank(tableName), "table name cannot be empty");
return String.format("%s.%s", dbName, tableName);
}
public static <T extends Oaf> String tableIdentifier(String dbName, Class<T> clazz) {
checkArgument(Objects.nonNull(clazz), "clazz is needed to derive the table name, thus cannot be null");
return tableIdentifier(dbName, clazz.getSimpleName().toLowerCase());
}
public static <T extends Oaf> Function<T, String> idFn() {
return x -> {
if (isSubClass(x, Relation.class)) {
return idFnForRelation(x);
}
return idFnForOafEntity(x);
};
}
private static <T extends Oaf> String idFnForRelation(T t) {
Relation r = (Relation) t;
return Optional
.ofNullable(r.getSource())
.map(
source -> Optional
.ofNullable(r.getTarget())
.map(
target -> Optional
.ofNullable(r.getRelType())
.map(
relType -> Optional
.ofNullable(r.getSubRelType())
.map(
subRelType -> Optional
.ofNullable(r.getRelClass())
.map(
relClass -> String
.join(
source,
target,
relType.toString(),
subRelType.toString(),
relClass.toString()))
.orElse(
String
.join(
source,
target,
relType.toString(),
subRelType.toString())))
.orElse(String.join(source, target, relType.toString())))
.orElse(String.join(source, target)))
.orElse(source))
.orElse(null);
}
private static <T extends Oaf> String idFnForOafEntity(T t) {
return ((Entity) t).getId();
}
public static String md5(final String s) {
try {
final MessageDigest md = MessageDigest.getInstance("MD5");
md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest()));
} catch (final NoSuchAlgorithmException e) {
throw new IllegalStateException(e);
}
}
public static String generateIdentifier(final String originalId, final String nsPrefix) {
return String.format("%s::%s", nsPrefix, md5(originalId));
}
public static String oldest(String dateA, String dateB) throws ParseException {
if (StringUtils.isBlank(dateA)) {
return dateB;
}
if (StringUtils.isBlank(dateB)) {
return dateA;
}
if (StringUtils.isNotBlank(dateA) && StringUtils.isNotBlank(dateB)) {
final Date a = DateParserUtils.parseDate(dateA);
final Date b = DateParserUtils.parseDate(dateB);
if (Objects.nonNull(a) && Objects.nonNull(b)) {
return a.before(b) ? dateA : dateB;
} else {
return null;
}
} else {
return null;
}
}
}

View File

@ -0,0 +1,45 @@
package eu.dnetlib.dhp.schema.oaf.common;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class RefereedComparator implements Comparator<Qualifier> {
@Override
public int compare(Qualifier left, Qualifier right) {
if (left == null && right == null)
return 0;
if (left == null)
return 1;
if (right == null)
return -1;
String lClass = left.getClassid();
String rClass = right.getClassid();
if (lClass.equals(rClass))
return 0;
if (lClass.equals(ModelConstants.PEER_REVIEWED_CLASSID))
return -1;
if (rClass.equals(ModelConstants.PEER_REVIEWED_CLASSID))
return 1;
if (lClass.equals(ModelConstants.NON_PEER_REVIEWED_CLASSID))
return -1;
if (rClass.equals(ModelConstants.NON_PEER_REVIEWED_CLASSID))
return 1;
if (lClass.equals(ModelConstants.UNKNOWN))
return -1;
if (rClass.equals(ModelConstants.UNKNOWN))
return 1;
// Else (but unlikely), lexicographical ordering will do.
return lClass.compareTo(rClass);
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.schema.oaf.common;
import eu.dnetlib.dhp.schema.oaf.Relation;
public class RelationLabel {
private final Relation.RELCLASS relClass;
private final Relation.RELTYPE relType;
private final Relation.SUBRELTYPE subReltype;
public RelationLabel(Relation.RELCLASS relClass, Relation.RELTYPE relType, Relation.SUBRELTYPE subReltype) {
this.relClass = relClass;
this.relType = relType;
this.subReltype = subReltype;
}
public RelationLabel inverse() {
return new RelationLabel(relClass.getInverse(), relType, subReltype);
}
public Relation.RELTYPE getRelType() {
return relType;
}
public Relation.SUBRELTYPE getSubReltype() {
return subReltype;
}
public Relation.RELCLASS getRelClass() {
return relClass;
}
}

View File

@ -0,0 +1,101 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.HashSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import lombok.val;
public class CleaningFunctions {
public static final String DOI_PREFIX_REGEX = "(^10\\.|\\/10\\.)";
private static final String ALL_SPACES_REGEX = "(?:\\n|\\r|\\t|\\s)";
public static final String DOI_PREFIX = "10.";
public static final Set<String> PID_BLACKLIST = new HashSet<>();
static {
PID_BLACKLIST.add("none");
PID_BLACKLIST.add("na");
}
public CleaningFunctions() {
}
/**
* Utility method that filter PID values on a per-type basis.
* @param s the PID whose value will be checked.
* @return false if the pid matches the filter criteria, true otherwise.
*/
public static boolean pidFilter(StructuredProperty s) {
final String pidValue = s.getValue();
if (Objects.isNull(s.getQualifier()) ||
StringUtils.isBlank(pidValue) ||
StringUtils.isBlank(pidValue.replaceAll("(?:\\n|\\r|\\t|\\s)", ""))) {
return false;
}
if (CleaningFunctions.PID_BLACKLIST.contains(pidValue)) {
return false;
}
return !PidBlacklistProvider.getBlacklist(s.getQualifier().getClassid()).contains(pidValue);
}
/**
* Utility method that normalises PID values on a per-type basis.
* @param pid the PID whose value will be normalised.
* @return the PID containing the normalised value.
*/
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
pid
.setValue(
normalizePidValue(
pid.getQualifier().getClassid(),
pid.getValue()));
return pid;
}
/**
* This utility was moved from DOIBoost,
* it implements a better cleaning of DOI.
* In case of wrong DOI it raises an illegalArgumentException
* @param input DOI
* @return normalized DOI
*/
private static String normalizeDOI(final String input) {
if (input == null)
throw new IllegalArgumentException("PID value cannot be empty");
final String replaced = input
.replaceAll(ALL_SPACES_REGEX, "")
.toLowerCase()
.replaceFirst(DOI_PREFIX_REGEX, DOI_PREFIX);
if (StringUtils.isEmpty(replaced.trim()))
throw new IllegalArgumentException("PID value normalized return empty string");
if (!replaced.contains("10."))
throw new IllegalArgumentException("DOI Must starts with 10.");
return replaced.substring(replaced.indexOf("10."));
}
public static String normalizePidValue(String pidType, String pidValue) {
String value = Optional
.ofNullable(pidValue)
.map(String::trim)
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
switch (pidType) {
// TODO add cleaning for more PID types as needed
case ModelConstants.DOI:
return normalizeDOI(value.toLowerCase());
}
return value;
}
}

View File

@ -16,14 +16,16 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.sisyphsu.dateparser.DateParserUtils; import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import me.xuender.unidecode.Unidecode; import me.xuender.unidecode.Unidecode;
public class GraphCleaningFunctions extends CleaningFunctions { public class GraphCleaningFunctions extends CleaningFunctions {
@ -38,6 +40,127 @@ public class GraphCleaningFunctions extends CleaningFunctions {
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5; public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
public static <T extends Oaf> T cleanContext(T value, String contextId, String verifyParam) {
if (ModelSupport.isSubClass(value, Result.class)) {
final Result res = (Result) value;
if (shouldCleanContext(res, verifyParam)) {
res
.setContext(
res
.getContext()
.stream()
.filter(c -> !StringUtils.startsWith(c.getId().toLowerCase(), contextId))
.collect(Collectors.toList()));
}
return (T) res;
} else {
return value;
}
}
private static boolean shouldCleanContext(Result res, String verifyParam) {
boolean titleMatch = res
.getTitle()
.stream()
.filter(
t -> t
.getQualifier()
.getClassid()
.equalsIgnoreCase(ModelConstants.MAIN_TITLE_QUALIFIER.getClassid()))
.anyMatch(t -> t.getValue().toLowerCase().startsWith(verifyParam.toLowerCase()));
return titleMatch && Objects.nonNull(res.getContext());
}
public static <T extends Oaf> T cleanCountry(T value, String[] verifyParam, Set<String> hostedBy,
String collectedfrom, String country) {
if (ModelSupport.isSubClass(value, Result.class)) {
final Result res = (Result) value;
if (res.getInstance().stream().anyMatch(i -> hostedBy.contains(i.getHostedby().getKey())) ||
!res.getCollectedfrom().stream().anyMatch(cf -> cf.getValue().equals(collectedfrom))) {
return (T) res;
}
List<StructuredProperty> ids = getPidsAndAltIds(res).collect(Collectors.toList());
if (ids
.stream()
.anyMatch(
p -> p
.getQualifier()
.getClassid()
.equals(PidType.doi.toString()) && pidInParam(p.getValue(), verifyParam))) {
res
.setCountry(
res
.getCountry()
.stream()
.filter(
c -> toTakeCountry(c, country))
.collect(Collectors.toList()));
}
return (T) res;
} else {
return value;
}
}
private static <T extends Result> Stream<StructuredProperty> getPidsAndAltIds(T r) {
final Stream<StructuredProperty> resultPids = Optional
.ofNullable(r.getPid())
.map(Collection::stream)
.orElse(Stream.empty());
final Stream<StructuredProperty> instancePids = Optional
.ofNullable(r.getInstance())
.map(
instance -> instance
.stream()
.flatMap(
i -> Optional
.ofNullable(i.getPid())
.map(Collection::stream)
.orElse(Stream.empty())))
.orElse(Stream.empty());
final Stream<StructuredProperty> instanceAltIds = Optional
.ofNullable(r.getInstance())
.map(
instance -> instance
.stream()
.flatMap(
i -> Optional
.ofNullable(i.getAlternateIdentifier())
.map(Collection::stream)
.orElse(Stream.empty())))
.orElse(Stream.empty());
return Stream
.concat(
Stream.concat(resultPids, instancePids),
instanceAltIds);
}
private static boolean pidInParam(String value, String[] verifyParam) {
for (String s : verifyParam)
if (value.startsWith(s))
return true;
return false;
}
private static boolean toTakeCountry(Country c, String country) {
// If dataInfo is not set, or dataInfo.inferenceprovenance is not set or not present then it cannot be
// inserted via propagation
if (!Optional.ofNullable(c.getDataInfo()).isPresent())
return true;
if (!Optional.ofNullable(c.getDataInfo().getInferenceprovenance()).isPresent())
return true;
return !(c
.getClassid()
.equalsIgnoreCase(country) &&
c.getDataInfo().getInferenceprovenance().equals("propagation"));
}
public static <T extends Oaf> T fixVocabularyNames(T value) { public static <T extends Oaf> T fixVocabularyNames(T value) {
if (value instanceof Datasource) { if (value instanceof Datasource) {
// nothing to clean here // nothing to clean here
@ -91,48 +214,31 @@ public class GraphCleaningFunctions extends CleaningFunctions {
} }
public static <T extends Oaf> boolean filter(T value) { public static <T extends Oaf> boolean filter(T value) {
if (Boolean.TRUE if (value instanceof Entity) {
.equals( Entity entity = (Entity) value;
Optional if (Boolean.TRUE
.ofNullable(value) .equals(
.map( Optional
o -> Optional .ofNullable(entity)
.ofNullable(o.getDataInfo()) .map(
.map( o -> Optional
d -> Optional .ofNullable(o.getDataInfo())
.ofNullable(d.getInvisible()) .map(
.orElse(true)) d -> Optional
.orElse(true)) .ofNullable(d.getInvisible())
.orElse(true))) { .orElse(true))
return true; .orElse(true))
} .orElse(true))) {
return true;
if (value instanceof Datasource) { } else if (value instanceof Result) {
// nothing to evaluate here Result r = (Result) value;
} else if (value instanceof Project) {
// nothing to evaluate here
} else if (value instanceof Organization) {
// nothing to evaluate here
} else if (value instanceof Relation) {
// nothing to clean here
} else if (value instanceof Result) {
Result r = (Result) value;
if (Objects.isNull(r.getTitle()) || r.getTitle().isEmpty()) {
return false;
}
if (value instanceof Publication) {
} else if (value instanceof Dataset) {
} else if (value instanceof OtherResearchProduct) {
} else if (value instanceof Software) {
if (Objects.isNull(r.getTitle()) || r.getTitle().isEmpty()) {
return false;
}
} }
} }
return true; return true;
} }
@ -164,7 +270,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
if (Objects.nonNull(r.getDateofacceptance())) { if (Objects.nonNull(r.getDateofacceptance())) {
Optional<String> date = cleanDateField(r.getDateofacceptance()); Optional<String> date = cleanDateField(r.getDateofacceptance());
if (date.isPresent()) { if (date.isPresent()) {
r.getDateofacceptance().setValue(date.get()); r.setDateofacceptance(date.get());
} else { } else {
r.setDateofacceptance(null); r.setDateofacceptance(null);
} }
@ -185,7 +291,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.filter(sp -> StringUtils.isNotBlank(sp.getValue())) .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) { if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getName())) {
r.setPublisher(null); r.setPublisher(null);
} }
if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) { if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
@ -267,7 +373,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.getDescription() .getDescription()
.stream() .stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(sp -> StringUtils.isNotBlank(sp.getValue())) .filter(s -> StringUtils.isNotBlank(s))
.map(GraphCleaningFunctions::cleanValue) .map(GraphCleaningFunctions::cleanValue)
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
@ -288,29 +394,25 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.setInstancetype( .setInstancetype(
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
"0038", "Other literature type", ModelConstants.DNET_PUBLICATION_RESOURCE, "0038", "Other literature type", ModelConstants.DNET_PUBLICATION_RESOURCE));
ModelConstants.DNET_PUBLICATION_RESOURCE));
} else if (r instanceof Dataset) { } else if (r instanceof Dataset) {
i i
.setInstancetype( .setInstancetype(
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
"0039", "Other dataset type", ModelConstants.DNET_PUBLICATION_RESOURCE, "0039", "Other dataset type", ModelConstants.DNET_PUBLICATION_RESOURCE));
ModelConstants.DNET_PUBLICATION_RESOURCE));
} else if (r instanceof Software) { } else if (r instanceof Software) {
i i
.setInstancetype( .setInstancetype(
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
"0040", "Other software type", ModelConstants.DNET_PUBLICATION_RESOURCE, "0040", "Other software type", ModelConstants.DNET_PUBLICATION_RESOURCE));
ModelConstants.DNET_PUBLICATION_RESOURCE));
} else if (r instanceof OtherResearchProduct) { } else if (r instanceof OtherResearchProduct) {
i i
.setInstancetype( .setInstancetype(
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
"0020", "Other ORP type", ModelConstants.DNET_PUBLICATION_RESOURCE, "0020", "Other ORP type", ModelConstants.DNET_PUBLICATION_RESOURCE));
ModelConstants.DNET_PUBLICATION_RESOURCE));
} }
} }
@ -348,7 +450,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
if (Objects.nonNull(i.getDateofacceptance())) { if (Objects.nonNull(i.getDateofacceptance())) {
Optional<String> date = cleanDateField(i.getDateofacceptance()); Optional<String> date = cleanDateField(i.getDateofacceptance());
if (date.isPresent()) { if (date.isPresent()) {
i.getDateofacceptance().setValue(date.get()); i.setDateofacceptance(date.get());
} else { } else {
i.setDateofacceptance(null); i.setDateofacceptance(null);
} }
@ -456,10 +558,9 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return value; return value;
} }
private static Optional<String> cleanDateField(Field<String> dateofacceptance) { private static Optional<String> cleanDateField(String dateofacceptance) {
return Optional return Optional
.ofNullable(dateofacceptance) .ofNullable(dateofacceptance)
.map(Field::getValue)
.map(GraphCleaningFunctions::cleanDate) .map(GraphCleaningFunctions::cleanDate)
.filter(Objects::nonNull); .filter(Objects::nonNull);
} }
@ -513,20 +614,17 @@ public class GraphCleaningFunctions extends CleaningFunctions {
private static void fixVocabName(Qualifier q, String vocabularyName) { private static void fixVocabName(Qualifier q, String vocabularyName) {
if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) { if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
q.setSchemeid(vocabularyName); q.setSchemeid(vocabularyName);
q.setSchemename(vocabularyName);
} }
} }
private static AccessRight accessRight(String classid, String classname, String scheme) { private static AccessRight accessRight(String classid, String classname, String scheme) {
return OafMapperUtils return OafMapperUtils
.accessRight( .accessRight(
classid, classname, scheme, scheme); classid, classname, scheme);
} }
private static Qualifier qualifier(String classid, String classname, String scheme) { private static Qualifier qualifier(String classid, String classname, String scheme) {
return OafMapperUtils return OafMapperUtils.qualifier(classid, classname, scheme);
.qualifier(
classid, classname, scheme, scheme);
} }
protected static StructuredProperty cleanValue(StructuredProperty s) { protected static StructuredProperty cleanValue(StructuredProperty s) {
@ -539,9 +637,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
return s; return s;
} }
protected static Field<String> cleanValue(Field<String> s) { protected static String cleanValue(String s) {
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " ")); return s.replaceAll(CLEANING_REGEX, " ");
return s;
} }
} }

View File

@ -0,0 +1,317 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static com.google.common.base.Preconditions.checkArgument;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
/**
* Factory class for OpenAIRE identifiers in the Graph
*/
public class IdentifierFactory implements Serializable {
public static final String ID_SEPARATOR = "::";
public static final String ID_PREFIX_SEPARATOR = "|";
public static final int ID_PREFIX_LEN = 12;
/**
* Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] considered authoritative for that PID_TYPE.
* The id of the record (source_::id) will be rewritten as pidType_::id)
*/
public static final Map<PidType, HashBiMap<String, String>> PID_AUTHORITY = Maps.newHashMap();
static {
PID_AUTHORITY.put(PidType.doi, HashBiMap.create());
PID_AUTHORITY.get(PidType.doi).put(CROSSREF_ID, "Crossref");
PID_AUTHORITY.get(PidType.doi).put(DATACITE_ID, "Datacite");
PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "ZENODO");
PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "Zenodo");
PID_AUTHORITY.put(PidType.pmc, HashBiMap.create());
PID_AUTHORITY.get(PidType.pmc).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
PID_AUTHORITY.get(PidType.pmc).put(PUBMED_CENTRAL_ID, "PubMed Central");
PID_AUTHORITY.put(PidType.pmid, HashBiMap.create());
PID_AUTHORITY.get(PidType.pmid).put(EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central");
PID_AUTHORITY.get(PidType.pmid).put(PUBMED_CENTRAL_ID, "PubMed Central");
PID_AUTHORITY.put(PidType.arXiv, HashBiMap.create());
PID_AUTHORITY.get(PidType.arXiv).put(ARXIV_ID, "arXiv.org e-Print Archive");
PID_AUTHORITY.put(PidType.w3id, HashBiMap.create());
PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ROHub");
}
/**
* Declares the associations PID_TYPE -> [DATASOURCE ID, PID SUBSTRING] considered as delegated authority for that
* PID_TYPE. Example, Zenodo is delegated to forge DOIs that contain the 'zenodo' word.
*
* If a record with the same id (same pid) comes from 2 data sources, the one coming from a delegated source wins. E.g. Zenodo records win over those from Datacite.
* See also https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/187 and the class dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java
*/
public static final Map<PidType, Map<String, String>> DELEGATED_PID_AUTHORITY = Maps.newHashMap();
static {
DELEGATED_PID_AUTHORITY.put(PidType.doi, new HashMap<>());
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_OD_ID, "zenodo");
DELEGATED_PID_AUTHORITY.get(PidType.doi).put(ZENODO_R3_ID, "zenodo");
DELEGATED_PID_AUTHORITY.put(PidType.w3id, new HashMap<>());
DELEGATED_PID_AUTHORITY.get(PidType.w3id).put(ROHUB_ID, "ro-id");
}
/**
* Declares the associations PID_TYPE -> [DATASOURCE ID, NAME] whose records are considered enrichment for the graph.
* Their OpenAIRE ID is built from the declared PID type. Are merged with their corresponding record, identified by
* the same OpenAIRE id.
*/
public static final Map<PidType, HashBiMap<String, String>> ENRICHMENT_PROVIDER = Maps.newHashMap();
static {
ENRICHMENT_PROVIDER.put(PidType.doi, HashBiMap.create());
ENRICHMENT_PROVIDER.get(PidType.doi).put(OPEN_APC_ID, OPEN_APC_NAME);
}
public static Set<String> delegatedAuthorityDatasourceIds() {
return DELEGATED_PID_AUTHORITY
.values()
.stream()
.flatMap(m -> m.keySet().stream())
.collect(Collectors.toCollection(HashSet::new));
}
public static List<StructuredProperty> getPids(List<StructuredProperty> pid, KeyValue collectedFrom) {
return pidFromInstance(pid, collectedFrom, true).distinct().collect(Collectors.toList());
}
public static <T extends Result> String createDOIBoostIdentifier(T entity) {
if (entity == null)
return null;
StructuredProperty pid = null;
if (entity.getPid() != null) {
pid = entity
.getPid()
.stream()
.filter(Objects::nonNull)
.filter(s -> s.getQualifier() != null && "doi".equalsIgnoreCase(s.getQualifier().getClassid()))
.filter(CleaningFunctions::pidFilter)
.findAny()
.orElse(null);
} else {
if (entity.getInstance() != null) {
pid = entity
.getInstance()
.stream()
.filter(i -> i.getPid() != null)
.flatMap(i -> i.getPid().stream())
.filter(CleaningFunctions::pidFilter)
.findAny()
.orElse(null);
}
}
if (pid != null)
return idFromPid(entity, pid, true);
return null;
}
/**
* Creates an identifier from the most relevant PID (if available) provided by a known PID authority in the given
* entity T. Returns entity.id when none of the PIDs meet the selection criteria is available.
*
* @param entity the entity providing PIDs and a default ID.
* @param <T> the specific entity type. Currently Organization and Result subclasses are supported.
* @param md5 indicates whether should hash the PID value or not.
* @return an identifier from the most relevant PID, entity.id otherwise
*/
public static <T extends Entity> String createIdentifier(T entity, boolean md5) {
checkArgument(StringUtils.isNoneBlank(entity.getId()), "missing entity identifier");
final Map<String, Set<StructuredProperty>> pids = extractPids(entity);
return pids
.values()
.stream()
.flatMap(Set::stream)
.min(new PidComparator<>(entity))
.map(
min -> Optional
.ofNullable(pids.get(min.getQualifier().getClassid()))
.map(
p -> p
.stream()
.sorted(new PidValueComparator())
.findFirst()
.map(s -> idFromPid(entity, s, md5))
.orElseGet(entity::getId))
.orElseGet(entity::getId))
.orElseGet(entity::getId);
}
private static <T extends Entity> Map<String, Set<StructuredProperty>> extractPids(T entity) {
if (entity instanceof Result) {
return Optional
.ofNullable(((Result) entity).getInstance())
.map(IdentifierFactory::mapPids)
.orElse(new HashMap<>());
} else {
return entity
.getPid()
.stream()
.map(CleaningFunctions::normalizePidValue)
.filter(CleaningFunctions::pidFilter)
.collect(
Collectors
.groupingBy(
p -> p.getQualifier().getClassid(),
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
}
}
private static Map<String, Set<StructuredProperty>> mapPids(List<Instance> instance) {
return instance
.stream()
.map(i -> pidFromInstance(i.getPid(), i.getCollectedfrom(), false))
.flatMap(Function.identity())
.collect(
Collectors
.groupingBy(
p -> p.getQualifier().getClassid(),
Collectors.mapping(p -> p, Collectors.toCollection(HashSet::new))));
}
private static Stream<StructuredProperty> pidFromInstance(List<StructuredProperty> pid, KeyValue collectedFrom,
boolean mapHandles) {
return Optional
.ofNullable(pid)
.map(
pp -> pp
.stream()
// filter away PIDs provided by a DS that is not considered an authority for the
// given PID Type
.filter(p -> shouldFilterPidByCriteria(collectedFrom, p, mapHandles))
.map(CleaningFunctions::normalizePidValue)
.filter(p -> isNotFromDelegatedAuthority(collectedFrom, p))
.filter(CleaningFunctions::pidFilter))
.orElse(Stream.empty());
}
private static boolean shouldFilterPidByCriteria(KeyValue collectedFrom, StructuredProperty p, boolean mapHandles) {
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
if (Objects.isNull(collectedFrom)) {
return false;
}
boolean isEnrich = Optional
.ofNullable(ENRICHMENT_PROVIDER.get(pType))
.map(
enrich -> enrich.containsKey(collectedFrom.getKey())
|| enrich.containsValue(collectedFrom.getValue()))
.orElse(false);
boolean isAuthority = Optional
.ofNullable(PID_AUTHORITY.get(pType))
.map(
authorities -> authorities.containsKey(collectedFrom.getKey())
|| authorities.containsValue(collectedFrom.getValue()))
.orElse(false);
return (mapHandles && pType.equals(PidType.handle)) || isEnrich || isAuthority;
}
private static boolean isNotFromDelegatedAuthority(KeyValue collectedFrom, StructuredProperty p) {
final PidType pType = PidType.tryValueOf(p.getQualifier().getClassid());
final Map<String, String> da = DELEGATED_PID_AUTHORITY.get(pType);
if (Objects.isNull(da)) {
return true;
}
if (!da.containsKey(collectedFrom.getKey())) {
return true;
}
return StringUtils.contains(p.getValue(), da.get(collectedFrom.getKey()));
}
/**
* @see {@link IdentifierFactory#createIdentifier(Entity, boolean)}
*/
public static <T extends Entity> String createIdentifier(T entity) {
return createIdentifier(entity, true);
}
private static <T extends Entity> String idFromPid(T entity, StructuredProperty s, boolean md5) {
return idFromPid(ModelSupport.getIdPrefix(entity.getClass()), s.getQualifier().getClassid(), s.getValue(), md5);
}
public static String idFromPid(String numericPrefix, String pidType, String pidValue, boolean md5) {
return new StringBuilder()
.append(numericPrefix)
.append(ID_PREFIX_SEPARATOR)
.append(createPrefix(pidType))
.append(ID_SEPARATOR)
.append(md5 ? ModelSupport.md5(pidValue) : pidValue)
.toString();
}
// create the prefix (length = 12)
private static String createPrefix(String pidType) {
StringBuilder prefix = new StringBuilder(StringUtils.left(pidType, ID_PREFIX_LEN));
while (prefix.length() < ID_PREFIX_LEN) {
prefix.append("_");
}
return prefix.substring(0, ID_PREFIX_LEN);
}
public static String createOpenaireId(
final int prefix,
final String originalId,
final boolean to_md5) {
if (StringUtils.isBlank(originalId)) {
return null;
} else if (to_md5) {
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
final String rest = StringUtils.substringAfter(originalId, "::");
return String.format("%s|%s::%s", prefix, nsPrefix, ModelSupport.md5(rest));
} else {
return String.format("%s|%s", prefix, originalId);
}
}
public static String createOpenaireId(
final String type,
final String originalId,
final boolean to_md5) {
switch (type) {
case "datasource":
return createOpenaireId(10, originalId, to_md5);
case "organization":
return createOpenaireId(20, originalId, to_md5);
case "person":
return createOpenaireId(30, originalId, to_md5);
case "project":
return createOpenaireId(40, originalId, to_md5);
default:
return createOpenaireId(50, originalId, to_md5);
}
}
}

View File

@ -0,0 +1,104 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.beanutils.BeanUtilsBean;
public class MergeBeanUtils {
/**
* Copies all properties from sources to destination, does not copy null values and any nested objects will attempted to be
* either cloned or copied into the existing object. This is recursive. Should not cause any infinite recursion.
* @param dest object to copy props into (will mutate)
* @param sources
* @param <T> dest
* @return
* @throws IllegalAccessException
* @throws InvocationTargetException
*/
public static <T> T mergeIn(T dest, T... sources) {
// to keep from any chance infinite recursion lets limit each object to 1 instance at a time in the stack
final List<Object> lookingAt = new ArrayList<>();
BeanUtilsBean recursiveBeanUtils = new BeanUtilsBean() {
/**
* Check if the class name is an internal one
* @param name
* @return
*/
private boolean isInternal(String name) {
return name.startsWith("java.") || name.startsWith("javax.")
|| name.startsWith("com.sun.") || name.startsWith("javax.")
|| name.startsWith("oracle.");
}
/**
* Override to ensure that we dont end up in infinite recursion
* @param dest
* @param orig
* @throws IllegalAccessException
* @throws InvocationTargetException
*/
@Override
public void copyProperties(Object dest, Object orig)
throws IllegalAccessException, InvocationTargetException {
try {
// if we have an object in our list, that means we hit some sort of recursion, stop here.
if (lookingAt.stream().anyMatch(o -> o == dest)) {
return; // recursion detected
}
lookingAt.add(dest);
super.copyProperties(dest, orig);
} finally {
lookingAt.remove(dest);
}
}
@Override
public void copyProperty(Object dest, String name, Object value)
throws IllegalAccessException, InvocationTargetException {
if ("resulttype".equals(name)) {
return;
} else if (value != null) {
// dont copy over null values
// attempt to check if the value is a pojo we can clone using nested calls
if (!value.getClass().isPrimitive() && !value.getClass().isSynthetic()
&& !isInternal(value.getClass().getName())) {
try {
Object prop = super.getPropertyUtils().getProperty(dest, name);
// get current value, if its null then clone the value and set that to the value
if (prop == null) {
super.setProperty(dest, name, super.cloneBean(value));
} else {
// get the destination value and then recursively call
copyProperties(prop, value);
}
} catch (NoSuchMethodException e) {
return;
} catch (InstantiationException e) {
throw new RuntimeException("Nested property could not be cloned.", e);
}
} else {
super.copyProperty(dest, name, value);
}
}
}
};
for (Object source : sources) {
try {
recursiveBeanUtils.copyProperties(dest, source);
} catch (IllegalAccessException | InvocationTargetException e) {
throw new RuntimeException(e);
}
}
return dest;
}
}

View File

@ -0,0 +1,903 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static com.google.common.base.Preconditions.checkArgument;
import static eu.dnetlib.dhp.schema.oaf.common.ModelSupport.isSubClass;
import static eu.dnetlib.dhp.schema.oaf.common.ModelSupport.sameClass;
import java.text.ParseException;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.AccessRightComparator;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
public class MergeUtils {
public static <T extends Oaf> T merge(final T left, final T right) {
return merge(left, right, false);
}
public static <T extends Oaf> T merge(final T left, final T right, boolean checkDelegatedAuthority) {
if (sameClass(left, right, Entity.class)) {
return mergeEntities(left, right, checkDelegatedAuthority);
} else if (sameClass(left, right, Relation.class)) {
return mergeRelation(left, right);
} else {
throw new RuntimeException(
String
.format(
"MERGE_FROM_AND_GET incompatible types: %s, %s",
left.getClass().getCanonicalName(), right.getClass().getCanonicalName()));
}
}
private static <T extends Oaf> T mergeEntities(T left, T right, boolean checkDelegatedAuthority) {
if (sameClass(left, right, Result.class)) {
if (!left.getClass().equals(right.getClass()) || checkDelegatedAuthority) {
return mergeResultsOfDifferentTypes(left, right);
}
return mergeResult(left, right);
} else if (sameClass(left, right, Datasource.class)) {
// TODO
return left;
} else if (sameClass(left, right, Organization.class)) {
return mergeOrganization(left, right);
} else if (sameClass(left, right, Project.class)) {
return mergeProject(left, right);
} else {
throw new RuntimeException(
String
.format(
"MERGE_FROM_AND_GET incompatible types: %s, %s",
left.getClass().getCanonicalName(), right.getClass().getCanonicalName()));
}
}
/**
* This method is used in the global result grouping phase. It checks if one of the two is from a delegated authority
* https://graph.openaire.eu/docs/data-model/pids-and-identifiers#delegated-authorities and in that case it prefers
* such version.
*
* Otherwise, it considers a resulttype priority order implemented in {@link ResultTypeComparator}
* and proceeds with the canonical property merging.
*
* @param left
* @param right
* @return
*/
private static <T extends Oaf> T mergeResultsOfDifferentTypes(T left, T right) {
final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority((Result) left);
final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority((Result) right);
if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) {
return left;
}
if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
return right;
}
if (new ResultTypeComparator().compare((Result) left, (Result) right) < 0) {
return mergeResult(left, right);
} else {
return mergeResult(right, left);
}
}
/**
* Internal utility that merges the common entity fields
*
* @param left
* @param right
* @return
* @param <T>
*/
private static <T extends Oaf> T mergeEntityFields(T left, T right) {
final Entity enrich = (Entity) right;
final Entity mergedEntity = (Entity) left;
mergedEntity.setOriginalId(mergeLists(mergedEntity.getOriginalId(), enrich.getOriginalId()));
mergedEntity.setCollectedfrom(mergeLists(mergedEntity.getCollectedfrom(), enrich.getCollectedfrom()));
if (mergedEntity.getLastupdatetimestamp() == null && enrich.getLastupdatetimestamp() != null) {
mergedEntity.setLastupdatetimestamp(enrich.getLastupdatetimestamp());
} else if (mergedEntity.getLastupdatetimestamp() != null && enrich.getLastupdatetimestamp() != null) {
mergedEntity
.setLastupdatetimestamp(
Long.max(mergedEntity.getLastupdatetimestamp(), enrich.getLastupdatetimestamp()));
}
mergedEntity.setPid(mergeLists(mergedEntity.getPid(), enrich.getPid()));
final int trustCompareResult = compareTrust(mergedEntity, enrich);
if (enrich.getDateofcollection() != null && trustCompareResult < 0)
mergedEntity.setDateofcollection(enrich.getDateofcollection());
if (enrich.getDateoftransformation() != null && trustCompareResult < 0)
mergedEntity.setDateoftransformation(enrich.getDateoftransformation());
mergedEntity.setMeasures(mergeLists(mergedEntity.getMeasures(), enrich.getMeasures()));
mergedEntity.setExtraInfo(mergeLists(mergedEntity.getExtraInfo(), enrich.getExtraInfo()));
return (T) mergedEntity;
}
public static <T extends Oaf> T mergeRelation(T left, T right) {
Relation original = (Relation) left;
Relation enrich = (Relation) right;
checkArgument(Objects.equals(original.getSource(), enrich.getSource()), "source ids must be equal");
checkArgument(Objects.equals(original.getTarget(), enrich.getTarget()), "target ids must be equal");
checkArgument(Objects.equals(original.getRelType(), enrich.getRelType()), "relType(s) must be equal");
checkArgument(
Objects.equals(original.getSubRelType(), enrich.getSubRelType()), "subRelType(s) must be equal");
checkArgument(Objects.equals(original.getRelClass(), enrich.getRelClass()), "relClass(es) must be equal");
original.setProvenance(mergeLists(original.getProvenance(), enrich.getProvenance()));
original.setValidated(original.getValidated() || enrich.getValidated());
try {
original.setValidationDate(ModelSupport.oldest(original.getValidationDate(), enrich.getValidationDate()));
} catch (ParseException e) {
throw new IllegalArgumentException(String
.format(
"invalid validation date format in relation [s:%s, t:%s]: %s", original.getSource(),
original.getTarget(),
original.getValidationDate()));
}
return (T) original;
}
private static <T extends Oaf> T mergeResult(T left, T right) {
Result original = (Result) left;
Result enrich = (Result) right;
final Result mergedResult = mergeEntityFields(original, enrich);
if (StringUtils.isBlank(mergedResult.getProcessingchargeamount())) {
mergedResult.setProcessingchargeamount(enrich.getProcessingchargeamount());
mergedResult.setProcessingchargecurrency(enrich.getProcessingchargecurrency());
}
mergedResult.setMeasures(mergeLists(mergedResult.getMeasures(), enrich.getMeasures()));
if (!isAnEnrichment(mergedResult) && !isAnEnrichment(enrich))
mergedResult.setInstance(mergeLists(mergedResult.getInstance(), enrich.getInstance()));
else {
final List<Instance> enrichmentInstances = isAnEnrichment(mergedResult) ? mergedResult.getInstance()
: enrich.getInstance();
final List<Instance> enrichedInstances = isAnEnrichment(mergedResult) ? enrich.getInstance()
: mergedResult.getInstance();
if (isAnEnrichment(mergedResult))
mergedResult.setDataInfo(enrich.getDataInfo());
mergedResult.setInstance(enrichInstances(enrichedInstances, enrichmentInstances));
}
if (enrich.getBestaccessright() != null
&& new AccessRightComparator<>()
.compare(enrich.getBestaccessright(), mergedResult.getBestaccessright()) < 0)
mergedResult.setBestaccessright(enrich.getBestaccessright());
final int trustCompareResult = compareTrust(mergedResult, enrich);
if (enrich.getResulttype() != null && trustCompareResult < 0)
mergedResult.setResulttype(enrich.getResulttype());
if (enrich.getLanguage() != null && trustCompareResult < 0)
mergedResult.setLanguage(enrich.getLanguage());
if (Objects.nonNull(enrich.getDateofacceptance())) {
if (Objects.isNull(mergedResult.getDateofacceptance()) || trustCompareResult < 0) {
mergedResult.setDateofacceptance(enrich.getDateofacceptance());
}
}
mergedResult.setCountry(mergeLists(mergedResult.getCountry(), enrich.getCountry()));
mergedResult.setSubject(mergeLists(mergedResult.getSubject(), enrich.getSubject()));
if (enrich.getJournal() != null && trustCompareResult < 0)
mergedResult.setJournal(enrich.getJournal());
// merge title lists: main title with higher trust and distinct between the others
StructuredProperty baseMainTitle = null;
if (mergedResult.getTitle() != null) {
baseMainTitle = getMainTitle(mergedResult.getTitle());
if (baseMainTitle != null) {
final StructuredProperty p = baseMainTitle;
mergedResult
.setTitle(mergedResult.getTitle().stream().filter(t -> t != p).collect(Collectors.toList()));
}
}
StructuredProperty newMainTitle = null;
if (enrich.getTitle() != null) {
newMainTitle = getMainTitle(enrich.getTitle());
if (newMainTitle != null) {
final StructuredProperty p = newMainTitle;
enrich.setTitle(enrich.getTitle().stream().filter(t -> t != p).collect(Collectors.toList()));
}
}
if (newMainTitle != null && trustCompareResult < 0) {
baseMainTitle = newMainTitle;
}
mergedResult.setTitle(mergeLists(mergedResult.getTitle(), enrich.getTitle()));
if (mergedResult.getTitle() != null && baseMainTitle != null) {
mergedResult.getTitle().add(baseMainTitle);
}
mergedResult.setRelevantdate(mergeLists(mergedResult.getRelevantdate(), enrich.getRelevantdate()));
mergedResult.setDescription(longestLists(mergedResult.getDescription(), enrich.getDescription()));
if (enrich.getPublisher() != null && trustCompareResult < 0)
mergedResult.setPublisher(enrich.getPublisher());
if (enrich.getEmbargoenddate() != null && trustCompareResult < 0)
mergedResult.setEmbargoenddate(enrich.getEmbargoenddate());
mergedResult.setSource(mergeLists(mergedResult.getSource(), enrich.getSource()));
mergedResult.setFulltext(mergeLists(mergedResult.getFulltext(), enrich.getFulltext()));
mergedResult.setFormat(mergeLists(mergedResult.getFormat(), enrich.getFormat()));
mergedResult.setContributor(mergeLists(mergedResult.getContributor(), enrich.getContributor()));
if (enrich.getResourcetype() != null)
mergedResult.setResourcetype(enrich.getResourcetype());
mergedResult.setCoverage(mergeLists(mergedResult.getCoverage(), enrich.getCoverage()));
mergedResult.setContext(mergeLists(mergedResult.getContext(), enrich.getContext()));
mergedResult
.setExternalReference(mergeLists(mergedResult.getExternalReference(), enrich.getExternalReference()));
if (enrich.getOaiprovenance() != null && trustCompareResult < 0)
mergedResult.setOaiprovenance(enrich.getOaiprovenance());
if (sameClass(mergedResult, enrich, Publication.class)) {
return (T) mergePublication(mergedResult, enrich);
}
if (sameClass(mergedResult, enrich, Dataset.class)) {
return (T) mergeDataset(mergedResult, enrich);
}
if (sameClass(mergedResult, enrich, OtherResearchProduct.class)) {
return (T) mergeORP(mergedResult, enrich);
}
if (sameClass(mergedResult, enrich, Software.class)) {
return (T) mergeSoftware(mergedResult, enrich);
}
mergeEntityDataInfo(original, enrich);
return (T) mergedResult;
}
private static <T extends Oaf> T mergeORP(T left, T right) {
final OtherResearchProduct original = (OtherResearchProduct) left;
final OtherResearchProduct enrich = (OtherResearchProduct) right;
original.setContactperson(mergeLists(original.getContactperson(), enrich.getContactperson()));
original.setContactgroup(mergeLists(original.getContactgroup(), enrich.getContactgroup()));
original.setTool(mergeLists(original.getTool(), enrich.getTool()));
mergeEntityDataInfo(original, enrich);
return (T) original;
}
private static <T extends Oaf> T mergeSoftware(T left, T right) {
final Software original = (Software) left;
final Software enrich = (Software) right;
original
.setDocumentationUrl(mergeLists(original.getDocumentationUrl(), enrich.getDocumentationUrl()));
original
.setCodeRepositoryUrl(
enrich.getCodeRepositoryUrl() != null && compareTrust(original, enrich) < 0
? enrich.getCodeRepositoryUrl()
: original.getCodeRepositoryUrl());
original
.setProgrammingLanguage(
enrich.getProgrammingLanguage() != null && compareTrust(original, enrich) < 0
? enrich.getProgrammingLanguage()
: original.getProgrammingLanguage());
mergeEntityDataInfo(original, enrich);
return (T) original;
}
private static <T extends Oaf> T mergeDataset(T left, T right) {
Dataset original = (Dataset) left;
Dataset enrich = (Dataset) right;
original
.setStoragedate(
enrich.getStoragedate() != null && compareTrust(original, enrich) < 0 ? enrich.getStoragedate()
: original.getStoragedate());
original
.setDevice(
enrich.getDevice() != null && compareTrust(original, enrich) < 0 ? enrich.getDevice()
: original.getDevice());
original
.setSize(
enrich.getSize() != null && compareTrust(original, enrich) < 0 ? enrich.getSize()
: original.getSize());
original
.setVersion(
enrich.getVersion() != null && compareTrust(original, enrich) < 0 ? enrich.getVersion()
: original.getVersion());
original
.setLastmetadataupdate(
enrich.getLastmetadataupdate() != null && compareTrust(original, enrich) < 0
? enrich.getLastmetadataupdate()
: original.getLastmetadataupdate());
original
.setMetadataversionnumber(
enrich.getMetadataversionnumber() != null && compareTrust(original, enrich) < 0
? enrich.getMetadataversionnumber()
: original.getMetadataversionnumber());
original.setGeolocation(mergeLists(original.getGeolocation(), enrich.getGeolocation()));
mergeEntityDataInfo(original, enrich);
return (T) original;
}
private static <T extends Oaf> T mergePublication(T original, T enrich) {
// add publication specific fields.
mergeEntityDataInfo(original, enrich);
return original;
}
private static <T extends Oaf> T mergeOrganization(T left, T right) {
Organization original = (Organization) left;
Organization enrich = (Organization) right;
final Organization mergedOrganization = mergeEntityFields(original, enrich);
int ct = compareTrust(mergedOrganization, enrich);
mergedOrganization
.setLegalshortname(
enrich.getLegalshortname() != null && ct < 0
? enrich.getLegalshortname()
: mergedOrganization.getLegalname());
mergedOrganization
.setLegalname(
enrich.getLegalname() != null && ct < 0 ? enrich.getLegalname()
: mergedOrganization.getLegalname());
mergedOrganization
.setAlternativeNames(mergeLists(enrich.getAlternativeNames(), mergedOrganization.getAlternativeNames()));
mergedOrganization
.setWebsiteurl(
enrich.getWebsiteurl() != null && ct < 0
? enrich.getWebsiteurl()
: mergedOrganization.getWebsiteurl());
mergedOrganization
.setLogourl(
enrich.getLogourl() != null && ct < 0
? enrich.getLogourl()
: mergedOrganization.getLogourl());
mergedOrganization
.setEclegalbody(
enrich.getEclegalbody() != null && ct < 0
? enrich.getEclegalbody()
: mergedOrganization.getEclegalbody());
mergedOrganization
.setEclegalperson(
enrich.getEclegalperson() != null && ct < 0
? enrich.getEclegalperson()
: mergedOrganization.getEclegalperson());
mergedOrganization
.setEcnonprofit(
enrich.getEcnonprofit() != null && ct < 0
? enrich.getEcnonprofit()
: mergedOrganization.getEcnonprofit());
mergedOrganization
.setEcresearchorganization(
enrich.getEcresearchorganization() != null && ct < 0
? enrich.getEcresearchorganization()
: mergedOrganization.getEcresearchorganization());
mergedOrganization
.setEchighereducation(
enrich.getEchighereducation() != null && ct < 0
? enrich.getEchighereducation()
: mergedOrganization.getEchighereducation());
mergedOrganization
.setEcinternationalorganizationeurinterests(
enrich.getEcinternationalorganizationeurinterests() != null && ct < 0
? enrich.getEcinternationalorganizationeurinterests()
: mergedOrganization.getEcinternationalorganizationeurinterests());
mergedOrganization
.setEcinternationalorganization(
enrich.getEcinternationalorganization() != null && ct < 0
? enrich.getEcinternationalorganization()
: mergedOrganization.getEcinternationalorganization());
mergedOrganization
.setEcenterprise(
enrich.getEcenterprise() != null && ct < 0
? enrich.getEcenterprise()
: mergedOrganization.getEcenterprise());
mergedOrganization
.setEcsmevalidated(
enrich.getEcsmevalidated() != null && ct < 0
? enrich.getEcsmevalidated()
: mergedOrganization.getEcsmevalidated());
mergedOrganization
.setEcnutscode(
enrich.getEcnutscode() != null && ct < 0
? enrich.getEcnutscode()
: mergedOrganization.getEcnutscode());
mergedOrganization
.setCountry(
enrich.getCountry() != null && ct < 0 ? enrich.getCountry()
: mergedOrganization.getCountry());
mergeEntityDataInfo(mergedOrganization, enrich);
return (T) mergedOrganization;
}
public static <T extends Oaf> T mergeProject(T left, T right) {
Project original = (Project) left;
Project enrich = (Project) right;
final Project mergedProject = mergeEntityFields(original, enrich);
int ct = compareTrust(mergedProject, enrich);
mergedProject
.setWebsiteurl(
enrich.getWebsiteurl() != null && ct < 0
? enrich.getWebsiteurl()
: mergedProject.getWebsiteurl());
mergedProject.setCode(enrich.getCode() != null && ct < 0 ? enrich.getCode() : mergedProject.getCode());
mergedProject
.setAcronym(
enrich.getAcronym() != null && ct < 0
? enrich.getAcronym()
: mergedProject.getAcronym());
mergedProject
.setTitle(
enrich.getTitle() != null && ct < 0
? enrich.getTitle()
: mergedProject.getTitle());
mergedProject
.setStartdate(
enrich.getStartdate() != null && ct < 0
? enrich.getStartdate()
: mergedProject.getStartdate());
mergedProject
.setEnddate(
enrich.getEnddate() != null && ct < 0
? enrich.getEnddate()
: mergedProject.getEnddate());
mergedProject
.setCallidentifier(
enrich.getCallidentifier() != null && ct < 0
? enrich.getCallidentifier()
: mergedProject.getCallidentifier());
mergedProject
.setKeywords(
enrich.getKeywords() != null && ct < 0
? enrich.getKeywords()
: mergedProject.getKeywords());
mergedProject
.setDuration(
enrich.getDuration() != null && ct < 0
? enrich.getDuration()
: mergedProject.getDuration());
mergedProject
.setEcsc39(
enrich.getEcsc39() != null && ct < 0
? enrich.getEcsc39()
: mergedProject.getEcsc39());
mergedProject
.setOamandatepublications(
enrich.getOamandatepublications() != null && ct < 0
? enrich.getOamandatepublications()
: mergedProject.getOamandatepublications());
mergedProject
.setEcarticle29_3(
enrich.getEcarticle29_3() != null && ct < 0
? enrich.getEcarticle29_3()
: mergedProject.getEcarticle29_3());
mergedProject.setSubjects(mergeLists(mergedProject.getSubjects(), enrich.getSubjects()));
mergedProject.setFundingtree(mergeLists(mergedProject.getFundingtree(), enrich.getFundingtree()));
mergedProject
.setContracttype(
enrich.getContracttype() != null && ct < 0
? enrich.getContracttype()
: mergedProject.getContracttype());
mergedProject
.setOptional1(
enrich.getOptional1() != null && ct < 0
? enrich.getOptional1()
: mergedProject.getOptional1());
mergedProject
.setOptional2(
enrich.getOptional2() != null && ct < 0
? enrich.getOptional2()
: mergedProject.getOptional2());
mergedProject
.setJsonextrainfo(
enrich.getJsonextrainfo() != null && ct < 0
? enrich.getJsonextrainfo()
: mergedProject.getJsonextrainfo());
mergedProject
.setContactfullname(
enrich.getContactfullname() != null && ct < 0
? enrich.getContactfullname()
: mergedProject.getContactfullname());
mergedProject
.setContactfax(
enrich.getContactfax() != null && ct < 0
? enrich.getContactfax()
: mergedProject.getContactfax());
mergedProject
.setContactphone(
enrich.getContactphone() != null && ct < 0
? enrich.getContactphone()
: mergedProject.getContactphone());
mergedProject
.setContactemail(
enrich.getContactemail() != null && ct < 0
? enrich.getContactemail()
: mergedProject.getContactemail());
mergedProject
.setSummary(
enrich.getSummary() != null && ct < 0
? enrich.getSummary()
: mergedProject.getSummary());
mergedProject
.setCurrency(
enrich.getCurrency() != null && ct < 0
? enrich.getCurrency()
: mergedProject.getCurrency());
if (enrich.getH2020topiccode() != null && StringUtils.isEmpty(mergedProject.getH2020topiccode())) {
mergedProject.setH2020topiccode(enrich.getH2020topiccode());
mergedProject.setH2020topicdescription(enrich.getH2020topicdescription());
}
mergedProject
.setH2020classification(
mergeLists(mergedProject.getH2020classification(), enrich.getH2020classification()));
mergeEntityDataInfo(mergedProject, enrich);
return (T) mergedProject;
}
private static <T extends Oaf> void mergeEntityDataInfo(T left, T right) {
Entity l = (Entity) left;
Entity r = (Entity) right;
Optional
.ofNullable(r)
.ifPresent(
other -> Optional
.ofNullable(other.getDataInfo())
.ifPresent(
otherDataInfo -> Optional
.ofNullable(l.getDataInfo())
.ifPresent(thisDataInfo -> {
if (compareTrust(r, other) < 0 || thisDataInfo.getInvisible()) {
l.setDataInfo(otherDataInfo);
}
})));
}
/**
* Gets main title.
*
* @param titles the titles
* @return the main title
*/
private static StructuredProperty getMainTitle(List<StructuredProperty> titles) {
// need to check if the list of titles contains more than 1 main title? (in that case, we should chose which
// main title select in the list)
for (StructuredProperty t : titles) {
if (t.getQualifier() != null && t.getQualifier().getClassid() != null)
if (t.getQualifier().getClassid().equals("main title"))
return t;
}
return null;
}
/**
* Longest lists list.
*
* @param a the a
* @param b the b
* @return the list
*/
public static List<String> longestLists(List<String> a, List<String> b) {
if (a == null || b == null)
return a == null ? b : a;
if (a.size() == b.size()) {
int msa = a
.stream()
.filter(i -> i != null)
.map(i -> i.length())
.max(Comparator.naturalOrder())
.orElse(0);
int msb = b
.stream()
.filter(i -> i != null)
.map(i -> i.length())
.max(Comparator.naturalOrder())
.orElse(0);
return msa > msb ? a : b;
}
return a.size() > b.size() ? a : b;
}
/**
* This main method apply the enrichment of the instances
*
* @param toEnrichInstances the instances that could be enriched
* @param enrichmentInstances the enrichment instances
* @return list of instances possibly enriched
*/
private static List<Instance> enrichInstances(final List<Instance> toEnrichInstances,
final List<Instance> enrichmentInstances) {
final List<Instance> enrichmentResult = new ArrayList<>();
if (toEnrichInstances == null) {
return enrichmentResult;
}
if (enrichmentInstances == null) {
return enrichmentResult;
}
Map<String, Instance> ri = toInstanceMap(enrichmentInstances);
toEnrichInstances.forEach(i -> {
final List<Instance> e = findEnrichmentsByPID(i.getPid(), ri);
if (e != null && e.size() > 0) {
e.forEach(enr -> applyEnrichment(i, enr));
} else {
final List<Instance> a = findEnrichmentsByPID(i.getAlternateIdentifier(), ri);
if (a != null && a.size() > 0) {
a.forEach(enr -> applyEnrichment(i, enr));
}
}
enrichmentResult.add(i);
});
return enrichmentResult;
}
/**
* This method converts the list of instance enrichments
* into a Map where the key is the normalized identifier
* and the value is the instance itself
*
* @param ri the list of enrichment instances
* @return the result map
*/
private static Map<String, Instance> toInstanceMap(final List<Instance> ri) {
return ri
.stream()
.filter(i -> i.getPid() != null || i.getAlternateIdentifier() != null)
.flatMap(i -> {
final List<Pair<String, Instance>> result = new ArrayList<>();
if (i.getPid() != null)
i
.getPid()
.stream()
.filter(MergeUtils::validPid)
.forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
if (i.getAlternateIdentifier() != null)
i
.getAlternateIdentifier()
.stream()
.filter(MergeUtils::validPid)
.forEach(p -> result.add(new ImmutablePair<>(extractKeyFromPid(p), i)));
return result.stream();
})
.collect(
Collectors
.toMap(
Pair::getLeft,
Pair::getRight,
(a, b) -> a));
}
private static boolean isFromDelegatedAuthority(Result r) {
return Optional
.ofNullable(r.getInstance())
.map(
instance -> instance
.stream()
.filter(i -> Objects.nonNull(i.getCollectedfrom()))
.map(i -> i.getCollectedfrom().getKey())
.anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)))
.orElse(false);
}
/**
* Valid pid boolean.
*
* @param p the p
* @return the boolean
*/
private static boolean validPid(final StructuredProperty p) {
return p.getValue() != null && p.getQualifier() != null && p.getQualifier().getClassid() != null;
}
/**
* Normalize pid string.
*
* @param pid the pid
* @return the string
*/
private static String extractKeyFromPid(final StructuredProperty pid) {
if (pid == null)
return null;
final StructuredProperty normalizedPid = CleaningFunctions.normalizePidValue(pid);
return String.format("%s::%s", normalizedPid.getQualifier().getClassid(), normalizedPid.getValue());
}
/**
* This utility method finds the list of enrichment instances
* that match one or more PIDs in the input list
*
* @param pids the list of PIDs
* @param enrichments the List of enrichment instances having the same pid
* @return the list
*/
private static List<Instance> findEnrichmentsByPID(final List<StructuredProperty> pids,
final Map<String, Instance> enrichments) {
if (pids == null || enrichments == null)
return null;
return pids
.stream()
.map(MergeUtils::extractKeyFromPid)
.map(enrichments::get)
.filter(Objects::nonNull)
.collect(Collectors.toList());
}
/**
* Is an enrichment boolean.
*
* @param e the e
* @return the boolean
*/
public static boolean isAnEnrichment(Entity e) {
return e.getDataInfo() != null &&
e.getDataInfo().getProvenanceaction() != null
&& ModelConstants.PROVENANCE_ENRICH.equalsIgnoreCase(e.getDataInfo().getProvenanceaction().getClassid());
}
/**
* This method apply enrichment on a single instance
* The enrichment consists of replacing values on
* single attribute only if in the current instance is missing
* The only repeatable field enriched is measures
*
* @param currentInstance the current instance
* @param enrichment the enrichment instance
*/
private static void applyEnrichment(final Instance currentInstance, final Instance enrichment) {
if (currentInstance == null || enrichment == null)
return;
// ENRICH accessright
if (enrichment.getAccessright() != null && currentInstance.getAccessright() == null)
currentInstance.setAccessright(enrichment.getAccessright());
// ENRICH license
if (enrichment.getLicense() != null && currentInstance.getLicense() == null)
currentInstance.setLicense(enrichment.getLicense());
// ENRICH instanceType
if (enrichment.getInstancetype() != null && currentInstance.getInstancetype() == null)
currentInstance.setInstancetype(enrichment.getInstancetype());
// ENRICH hostedby
if (enrichment.getHostedby() != null && currentInstance.getHostedby() == null)
currentInstance.setHostedby(enrichment.getHostedby());
// ENRICH distributionlocation
if (enrichment.getDistributionlocation() != null && currentInstance.getDistributionlocation() == null)
currentInstance.setDistributionlocation(enrichment.getDistributionlocation());
// ENRICH collectedfrom
if (enrichment.getCollectedfrom() != null && currentInstance.getCollectedfrom() == null)
currentInstance.setCollectedfrom(enrichment.getCollectedfrom());
// ENRICH dateofacceptance
if (enrichment.getDateofacceptance() != null && currentInstance.getDateofacceptance() == null)
currentInstance.setDateofacceptance(enrichment.getDateofacceptance());
// ENRICH processingchargeamount
if (enrichment.getProcessingchargeamount() != null && currentInstance.getProcessingchargeamount() == null)
currentInstance.setProcessingchargeamount(enrichment.getProcessingchargeamount());
// ENRICH refereed
if (enrichment.getRefereed() != null && currentInstance.getRefereed() == null)
currentInstance.setRefereed(enrichment.getRefereed());
// TODO check the other Instance fields
}
private static <T> List<T> mergeLists(final List<T>... lists) {
return Arrays
.stream(lists)
.filter(Objects::nonNull)
.flatMap(List::stream)
.filter(Objects::nonNull)
.distinct()
.collect(Collectors.toList());
}
private static int compareTrust(Entity a, Entity b) {
return Float
.compare(
Optional
.ofNullable(a.getDataInfo())
.map(EntityDataInfo::getTrust)
.orElse(0f),
Optional
.ofNullable(b.getDataInfo())
.map(EntityDataInfo::getTrust)
.orElse(0f));
}
}

View File

@ -0,0 +1,25 @@
package eu.dnetlib.dhp.schema.oaf.utils;
public class ModelHardLimits {
private ModelHardLimits() {
}
public static final String LAYOUT = "index";
public static final String INTERPRETATION = "openaire";
public static final String SEPARATOR = "-";
public static final int MAX_EXTERNAL_ENTITIES = 50;
public static final int MAX_AUTHORS = 200;
public static final int MAX_AUTHOR_FULLNAME_LENGTH = 1000;
public static final int MAX_TITLE_LENGTH = 5000;
public static final int MAX_TITLES = 10;
public static final int MAX_ABSTRACT_LENGTH = 150000;
public static final int MAX_INSTANCES = 10;
public static String getCollectionName(String format) {
return format + SEPARATOR + LAYOUT + SEPARATOR + INTERPRETATION;
}
}

View File

@ -13,75 +13,14 @@ import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.AccessRightComparator;
public class OafMapperUtils { public class OafMapperUtils {
private OafMapperUtils() { private OafMapperUtils() {
} }
public static Oaf merge(final Oaf left, final Oaf right) {
if (ModelSupport.isSubClass(left, OafEntity.class)) {
return mergeEntities((OafEntity) left, (OafEntity) right);
} else if (ModelSupport.isSubClass(left, Relation.class)) {
((Relation) left).mergeFrom((Relation) right);
} else {
throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
}
return left;
}
public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
if (ModelSupport.isSubClass(left, Result.class)) {
return mergeResults((Result) left, (Result) right);
} else if (ModelSupport.isSubClass(left, Datasource.class)) {
left.mergeFrom(right);
} else if (ModelSupport.isSubClass(left, Organization.class)) {
left.mergeFrom(right);
} else if (ModelSupport.isSubClass(left, Project.class)) {
left.mergeFrom(right);
} else {
throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
}
return left;
}
public static Result mergeResults(Result left, Result right) {
final boolean leftFromDelegatedAuthority = isFromDelegatedAuthority(left);
final boolean rightFromDelegatedAuthority = isFromDelegatedAuthority(right);
if (leftFromDelegatedAuthority && !rightFromDelegatedAuthority) {
return left;
}
if (!leftFromDelegatedAuthority && rightFromDelegatedAuthority) {
return right;
}
if (new ResultTypeComparator().compare(left, right) < 0) {
left.mergeFrom(right);
return left;
} else {
right.mergeFrom(left);
return right;
}
}
private static boolean isFromDelegatedAuthority(Result r) {
return Optional
.ofNullable(r.getInstance())
.map(
instance -> instance
.stream()
.filter(i -> Objects.nonNull(i.getCollectedfrom()))
.map(i -> i.getCollectedfrom().getKey())
.anyMatch(cfId -> IdentifierFactory.delegatedAuthorityDatasourceIds().contains(cfId)))
.orElse(false);
}
public static KeyValue keyValue(final String k, final String v) { public static KeyValue keyValue(final String k, final String v) {
final KeyValue kv = new KeyValue(); final KeyValue kv = new KeyValue();
kv.setKey(k); kv.setKey(k);
@ -101,26 +40,6 @@ public class OafMapperUtils {
return list; return list;
} }
public static <T> Field<T> field(final T value, final DataInfo info) {
if (value == null || StringUtils.isBlank(value.toString())) {
return null;
}
final Field<T> field = new Field<>();
field.setValue(value);
field.setDataInfo(info);
return field;
}
public static List<Field<String>> listFields(final DataInfo info, final String... values) {
return Arrays
.stream(values)
.map(v -> field(v, info))
.filter(Objects::nonNull)
.filter(distinctByKey(Field::getValue))
.collect(Collectors.toList());
}
public static <T> List<T> listValues(Array values) throws SQLException { public static <T> List<T> listValues(Array values) throws SQLException {
if (Objects.isNull(values)) { if (Objects.isNull(values)) {
return null; return null;
@ -132,38 +51,26 @@ public class OafMapperUtils {
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public static List<Field<String>> listFields(final DataInfo info, final List<String> values) { public static Qualifier unknown(final String schemeid) {
return values return qualifier(UNKNOWN, "Unknown", schemeid);
.stream()
.map(v -> field(v, info))
.filter(Objects::nonNull)
.filter(distinctByKey(Field::getValue))
.collect(Collectors.toList());
} }
public static Qualifier unknown(final String schemeid, final String schemename) { public static AccessRight accessRight(
return qualifier(UNKNOWN, "Unknown", schemeid, schemename); final String classid,
final String classname,
final String schemeid) {
return accessRight(classid, classname, schemeid, null);
} }
public static AccessRight accessRight( public static AccessRight accessRight(
final String classid, final String classid,
final String classname, final String classname,
final String schemeid, final String schemeid,
final String schemename) {
return accessRight(classid, classname, schemeid, schemename, null);
}
public static AccessRight accessRight(
final String classid,
final String classname,
final String schemeid,
final String schemename,
final OpenAccessRoute openAccessRoute) { final OpenAccessRoute openAccessRoute) {
final AccessRight accessRight = new AccessRight(); final AccessRight accessRight = new AccessRight();
accessRight.setClassid(classid); accessRight.setClassid(classid);
accessRight.setClassname(classname); accessRight.setClassname(classname);
accessRight.setSchemeid(schemeid); accessRight.setSchemeid(schemeid);
accessRight.setSchemename(schemename);
accessRight.setOpenAccessRoute(openAccessRoute); accessRight.setOpenAccessRoute(openAccessRoute);
return accessRight; return accessRight;
} }
@ -171,13 +78,11 @@ public class OafMapperUtils {
public static Qualifier qualifier( public static Qualifier qualifier(
final String classid, final String classid,
final String classname, final String classname,
final String schemeid, final String schemeid) {
final String schemename) {
final Qualifier q = new Qualifier(); final Qualifier q = new Qualifier();
q.setClassid(classid); q.setClassid(classid);
q.setClassname(classname); q.setClassname(classname);
q.setSchemeid(schemeid); q.setSchemeid(schemeid);
q.setSchemename(schemename);
return q; return q;
} }
@ -186,7 +91,6 @@ public class OafMapperUtils {
q.setClassid(qualifier.getClassid()); q.setClassid(qualifier.getClassid());
q.setClassname(qualifier.getClassname()); q.setClassname(qualifier.getClassname());
q.setSchemeid(qualifier.getSchemeid()); q.setSchemeid(qualifier.getSchemeid());
q.setSchemename(qualifier.getSchemename());
return q; return q;
} }
@ -195,21 +99,18 @@ public class OafMapperUtils {
final String classid, final String classid,
final String classname, final String classname,
final String schemeid, final String schemeid,
final String schemename,
final DataInfo dataInfo) { final DataInfo dataInfo) {
return subject(value, qualifier(classid, classname, schemeid, schemename), dataInfo); return subject(value, qualifier(classid, classname, schemeid), dataInfo);
} }
public static StructuredProperty structuredProperty( public static StructuredProperty structuredProperty(
final String value, final String value,
final String classid, final String classid,
final String classname, final String classname,
final String schemeid, final String schemeid) {
final String schemename,
final DataInfo dataInfo) {
return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo); return structuredProperty(value, qualifier(classid, classname, schemeid));
} }
public static Subject subject( public static Subject subject(
@ -228,18 +129,57 @@ public class OafMapperUtils {
public static StructuredProperty structuredProperty( public static StructuredProperty structuredProperty(
final String value, final String value,
final Qualifier qualifier, final Qualifier qualifier) {
final DataInfo dataInfo) {
if (value == null) { if (value == null) {
return null; return null;
} }
final StructuredProperty sp = new StructuredProperty(); final StructuredProperty sp = new StructuredProperty();
sp.setValue(value); sp.setValue(value);
sp.setQualifier(qualifier); sp.setQualifier(qualifier);
sp.setDataInfo(dataInfo);
return sp; return sp;
} }
public static Publisher publisher(final String name) {
final Publisher p = new Publisher();
p.setName(name);
return p;
}
public static License license(final String url) {
final License l = new License();
l.setUrl(url);
return l;
}
public static AuthorPid authorPid(
final String value,
final Qualifier qualifier,
final DataInfo dataInfo) {
if (value == null) {
return null;
}
final AuthorPid ap = new AuthorPid();
ap.setValue(value);
ap.setQualifier(qualifier);
ap.setDataInfo(dataInfo);
return ap;
}
public static AuthorPid authorPid(
final String value,
final String classid,
final String schemeid,
final DataInfo dataInfo) {
if (value == null) {
return null;
}
final AuthorPid ap = new AuthorPid();
ap.setValue(value);
ap.setQualifier(qualifier(classid, classid, schemeid));
ap.setDataInfo(dataInfo);
return ap;
}
public static ExtraInfo extraInfo( public static ExtraInfo extraInfo(
final String name, final String name,
final String value, final String value,
@ -281,8 +221,7 @@ public class OafMapperUtils {
final String name, final String name,
final String issnPrinted, final String issnPrinted,
final String issnOnline, final String issnOnline,
final String issnLinking, final String issnLinking) {
final DataInfo dataInfo) {
return hasIssn(issnPrinted, issnOnline, issnLinking) ? journal( return hasIssn(issnPrinted, issnOnline, issnLinking) ? journal(
name, name,
@ -295,8 +234,7 @@ public class OafMapperUtils {
null, null,
null, null,
null, null,
null, null) : null;
dataInfo) : null;
} }
public static Journal journal( public static Journal journal(
@ -310,8 +248,7 @@ public class OafMapperUtils {
final String vol, final String vol,
final String edition, final String edition,
final String conferenceplace, final String conferenceplace,
final String conferencedate, final String conferencedate) {
final DataInfo dataInfo) {
if (StringUtils.isNotBlank(name) || hasIssn(issnPrinted, issnOnline, issnLinking)) { if (StringUtils.isNotBlank(name) || hasIssn(issnPrinted, issnOnline, issnLinking)) {
final Journal j = new Journal(); final Journal j = new Journal();
@ -326,7 +263,6 @@ public class OafMapperUtils {
j.setEdition(edition); j.setEdition(edition);
j.setConferenceplace(conferenceplace); j.setConferenceplace(conferenceplace);
j.setConferencedate(conferencedate); j.setConferencedate(conferencedate);
j.setDataInfo(dataInfo);
return j; return j;
} else { } else {
return null; return null;
@ -340,53 +276,33 @@ public class OafMapperUtils {
} }
public static DataInfo dataInfo( public static DataInfo dataInfo(
final Boolean deletedbyinference, final float trust,
final String inferenceprovenance, final String inferenceprovenance,
final Boolean inferred, final boolean inferred,
final Boolean invisible, final Qualifier provenanceaction) {
final Qualifier provenanceaction,
final String trust) {
final DataInfo d = new DataInfo(); final DataInfo d = new DataInfo();
d.setDeletedbyinference(deletedbyinference); d.setTrust(trust);
d.setInferenceprovenance(inferenceprovenance); d.setInferenceprovenance(inferenceprovenance);
d.setInferred(inferred); d.setInferred(inferred);
d.setInvisible(invisible);
d.setProvenanceaction(provenanceaction); d.setProvenanceaction(provenanceaction);
d.setTrust(trust);
return d; return d;
} }
public static String createOpenaireId( public static EntityDataInfo dataInfo(
final int prefix, final boolean invisible,
final String originalId, final boolean deletedbyinference,
final boolean to_md5) { final float trust,
if (StringUtils.isBlank(originalId)) { final String inferenceprovenance,
return null; final boolean inferred,
} else if (to_md5) { final Qualifier provenanceaction) {
final String nsPrefix = StringUtils.substringBefore(originalId, "::"); final EntityDataInfo d = new EntityDataInfo();
final String rest = StringUtils.substringAfter(originalId, "::"); d.setTrust(trust);
return String.format("%s|%s::%s", prefix, nsPrefix, IdentifierFactory.md5(rest)); d.setInvisible(invisible);
} else { d.setDeletedbyinference(deletedbyinference);
return String.format("%s|%s", prefix, originalId); d.setInferenceprovenance(inferenceprovenance);
} d.setInferred(inferred);
} d.setProvenanceaction(provenanceaction);
return d;
public static String createOpenaireId(
final String type,
final String originalId,
final boolean to_md5) {
switch (type) {
case "datasource":
return createOpenaireId(10, originalId, to_md5);
case "organization":
return createOpenaireId(20, originalId, to_md5);
case "person":
return createOpenaireId(30, originalId, to_md5);
case "project":
return createOpenaireId(40, originalId, to_md5);
default:
return createOpenaireId(50, originalId, to_md5);
}
} }
public static String asString(final Object o) { public static String asString(final Object o) {
@ -422,71 +338,77 @@ public class OafMapperUtils {
if (StringUtils.isBlank(rights.getSchemeid())) { if (StringUtils.isBlank(rights.getSchemeid())) {
rights.setSchemeid(DNET_ACCESS_MODES); rights.setSchemeid(DNET_ACCESS_MODES);
} }
if (StringUtils.isBlank(rights.getSchemename())) {
rights.setSchemename(DNET_ACCESS_MODES);
}
return rights; return rights;
} }
return null; return null;
} }
public static KeyValue newKeyValueInstance(String key, String value, DataInfo dataInfo) {
KeyValue kv = new KeyValue();
kv.setDataInfo(dataInfo);
kv.setKey(key);
kv.setValue(value);
return kv;
}
public static Measure newMeasureInstance(String id, String value, String key, DataInfo dataInfo) { public static Measure newMeasureInstance(String id, String value, String key, DataInfo dataInfo) {
Measure m = new Measure(); Measure m = new Measure();
m.setId(id); m.setId(id);
m.setUnit(Arrays.asList(newKeyValueInstance(key, value, dataInfo))); m.setUnit(Arrays.asList(unit(key, value, dataInfo)));
return m; return m;
} }
public static MeasureUnit unit(String key, String value, DataInfo dataInfo) {
MeasureUnit unit = new MeasureUnit();
unit.setKey(key);
unit.setValue(value);
unit.setDataInfo(dataInfo);
return unit;
}
public static Relation getRelation(final String source, public static Relation getRelation(final String source,
final String target, final String target,
final String relType, final Relation.RELTYPE relType,
final String subRelType, final Relation.SUBRELTYPE subRelType,
final String relClass, final Relation.RELCLASS relClass,
final OafEntity entity) { final Entity entity) {
return getRelation(source, target, relType, subRelType, relClass, entity, null); return getRelation(source, target, relType, subRelType, relClass, entity, null);
} }
public static Relation getRelation(final String source, public static Relation getRelation(final String source,
final String target, final String target,
final String relType, final Relation.RELTYPE relType,
final String subRelType, final Relation.SUBRELTYPE subRelType,
final String relClass, final Relation.RELCLASS relClass,
final OafEntity entity, final Entity entity,
final String validationDate) { final String validationDate) {
final List<Provenance> provenance = getProvenance(
entity.getCollectedfrom(), fromEntityDataInfo(entity.getDataInfo()));
return getRelation( return getRelation(
source, target, relType, subRelType, relClass, entity.getCollectedfrom(), entity.getDataInfo(), source, target, relType, subRelType, relClass, provenance, validationDate, null);
entity.getLastupdatetimestamp(), validationDate, null);
} }
public static Relation getRelation(final String source, public static Relation getRelation(final String source,
final String target, final String target,
final String relType, final Relation.RELTYPE relType,
final String subRelType, final Relation.SUBRELTYPE subRelType,
final String relClass, final Relation.RELCLASS relClass,
final List<KeyValue> collectedfrom, final List<Provenance> provenance) {
final DataInfo dataInfo,
final Long lastupdatetimestamp) {
return getRelation( return getRelation(
source, target, relType, subRelType, relClass, collectedfrom, dataInfo, lastupdatetimestamp, null, null); source, target, relType, subRelType, relClass, provenance, null, null);
} }
public static Relation getRelation(final String source, public static Relation getRelation(final String source,
final String target, final String target,
final String relType, final Relation.RELTYPE relType,
final String subRelType, final Relation.SUBRELTYPE subRelType,
final String relClass, final Relation.RELCLASS relClass,
final List<KeyValue> collectedfrom, final List<Provenance> provenance,
final DataInfo dataInfo, final List<KeyValue> properties) {
final Long lastupdatetimestamp, return getRelation(
source, target, relType, subRelType, relClass, provenance, null, properties);
}
public static Relation getRelation(final String source,
final String target,
final Relation.RELTYPE relType,
final Relation.SUBRELTYPE subRelType,
final Relation.RELCLASS relClass,
final List<Provenance> provenance,
final String validationDate, final String validationDate,
final List<KeyValue> properties) { final List<KeyValue> properties) {
final Relation rel = new Relation(); final Relation rel = new Relation();
@ -495,15 +417,27 @@ public class OafMapperUtils {
rel.setRelClass(relClass); rel.setRelClass(relClass);
rel.setSource(source); rel.setSource(source);
rel.setTarget(target); rel.setTarget(target);
rel.setCollectedfrom(collectedfrom); rel.setProvenance(provenance);
rel.setDataInfo(dataInfo);
rel.setLastupdatetimestamp(lastupdatetimestamp);
rel.setValidated(StringUtils.isNotBlank(validationDate)); rel.setValidated(StringUtils.isNotBlank(validationDate));
rel.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null); rel.setValidationDate(StringUtils.isNotBlank(validationDate) ? validationDate : null);
rel.setProperties(properties); rel.setProperties(properties);
return rel; return rel;
} }
public static List<Provenance> getProvenance(final List<KeyValue> collectedfrom, final DataInfo dataInfo) {
return collectedfrom
.stream()
.map(cf -> getProvenance(cf, dataInfo))
.collect(Collectors.toList());
}
public static Provenance getProvenance(final KeyValue collectedfrom, final DataInfo dataInfo) {
final Provenance prov = new Provenance();
prov.setCollectedfrom(collectedfrom);
prov.setDataInfo(dataInfo);
return prov;
}
public static String getProvenance(DataInfo dataInfo) { public static String getProvenance(DataInfo dataInfo) {
return Optional return Optional
.ofNullable(dataInfo) .ofNullable(dataInfo)
@ -514,4 +448,13 @@ public class OafMapperUtils {
.orElse("")) .orElse(""))
.orElse(""); .orElse("");
} }
public static DataInfo fromEntityDataInfo(EntityDataInfo entityDataInfo) {
DataInfo dataInfo = new DataInfo();
dataInfo.setTrust(entityDataInfo.getTrust());
dataInfo.setInferenceprovenance(entityDataInfo.getInferenceprovenance());
dataInfo.setInferred(entityDataInfo.getInferred());
dataInfo.setProvenanceaction(entityDataInfo.getProvenanceaction());
return dataInfo;
}
} }

View File

@ -0,0 +1,38 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OrganizationPidComparator implements Comparator<StructuredProperty> {
@Override
public int compare(StructuredProperty left, StructuredProperty right) {
PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
if (lClass.equals(PidType.openorgs))
return -1;
if (rClass.equals(PidType.openorgs))
return 1;
if (lClass.equals(PidType.GRID))
return -1;
if (rClass.equals(PidType.GRID))
return 1;
if (lClass.equals(PidType.mag_id))
return -1;
if (rClass.equals(PidType.mag_id))
return 1;
if (lClass.equals(PidType.urn))
return -1;
if (rClass.equals(PidType.urn))
return 1;
return 0;
}
}

View File

@ -0,0 +1,8 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.HashMap;
import java.util.HashSet;
public class PidBlacklist extends HashMap<String, HashSet<String>> {
}

View File

@ -0,0 +1,40 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.io.IOException;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
public class PidBlacklistProvider {
private static final PidBlacklist blacklist;
static {
try {
String json = IOUtils.toString(IdentifierFactory.class.getResourceAsStream("pid_blacklist.json"));
blacklist = new ObjectMapper().readValue(json, PidBlacklist.class);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static PidBlacklist getBlacklist() {
return blacklist;
}
public static Set<String> getBlacklist(String pidType) {
return Optional
.ofNullable(getBlacklist().get(pidType))
.orElse(new HashSet<>());
}
private PidBlacklistProvider() {
}
}

View File

@ -0,0 +1,48 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
public class PidComparator<T extends Entity> implements Comparator<StructuredProperty> {
private final T entity;
public PidComparator(T entity) {
this.entity = entity;
}
@Override
public int compare(StructuredProperty left, StructuredProperty right) {
if (left == null && right == null)
return 0;
if (left == null)
return 1;
if (right == null)
return -1;
if (ModelSupport.isSubClass(entity, Result.class)) {
return compareResultPids(left, right);
}
if (ModelSupport.isSubClass(entity, Organization.class)) {
return compareOrganizationtPids(left, right);
}
// Else (but unlikely), lexicographical ordering will do.
return left.getQualifier().getClassid().compareTo(right.getQualifier().getClassid());
}
private int compareResultPids(StructuredProperty left, StructuredProperty right) {
return new ResultPidComparator().compare(left, right);
}
private int compareOrganizationtPids(StructuredProperty left, StructuredProperty right) {
return new OrganizationPidComparator().compare(left, right);
}
}

View File

@ -0,0 +1,79 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import org.apache.commons.lang3.EnumUtils;
public enum PidType {
/**
* The DOI syntax shall be made up of a DOI prefix and a DOI suffix separated by a forward slash.
*
* There is no defined limit on the length of the DOI name, or of the DOI prefix or DOI suffix.
*
* The DOI name is case-insensitive and can incorporate any printable characters from the legal graphic characters
* of Unicode. Further constraints on character use (e.g. use of language-specific alphanumeric characters) can be
* defined for an application by the ISO 26324 Registration Authority.
*
*
* DOI prefix: The DOI prefix shall be composed of a directory indicator followed by a registrant code.
* These two components shall be separated by a full stop (period). The directory indicator shall be "10" and
* distinguishes the entire set of character strings (prefix and suffix) as digital object identifiers within the
* resolution system.
*
* Registrant code: The second element of the DOI prefix shall be the registrant code. The registrant code is a
* unique string assigned to a registrant.
*
* DOI suffix: The DOI suffix shall consist of a character string of any length chosen by the registrant.
* Each suffix shall be unique to the prefix element that precedes it. The unique suffix can be a sequential number,
* or it might incorporate an identifier generated from or based on another system used by the registrant
* (e.g. ISAN, ISBN, ISRC, ISSN, ISTC, ISNI; in such cases, a preferred construction for such a suffix can be
* specified, as in Example 1).
*
* Source: https://www.doi.org/doi_handbook/2_Numbering.html#2.2
*/
doi,
/**
* PubMed Unique Identifier (PMID)
*
* This field is a 1-to-8 digit accession number with no leading zeros. It is present on all records and is the
* accession number for managing and disseminating records. PMIDs are not reused after records are deleted.
*
* Beginning in February 2012 PMIDs include extensions following a decimal point to account for article versions
* (e.g., 21804956.2). All citations are considered version 1 until replaced. The extended PMID is not displayed
* on the MEDLINE format.
*
* View the citation in abstract format in PubMed to access additional versions when available (see the article in
* the Jan-Feb 2012 NLM Technical Bulletin).
*
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid
*/
pmid,
/**
* This field contains the unique identifier for the cited article in PubMed Central. The identifier begins with the
* prefix PMC.
*
* Source: https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc
*/
pmc, handle, arXiv, nct, pdb, w3id,
// Organization
openorgs, corda, corda_h2020, GRID, mag_id, urn,
// Used by dedup
undefined, original;
public static boolean isValid(String type) {
return EnumUtils.isValidEnum(PidType.class, type);
}
public static PidType tryValueOf(String s) {
try {
return PidType.valueOf(s);
} catch (Exception e) {
return PidType.original;
}
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.Comparator;
import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class PidValueComparator implements Comparator<StructuredProperty> {
@Override
public int compare(StructuredProperty left, StructuredProperty right) {
if (left == null && right == null)
return 0;
if (left == null)
return 1;
if (right == null)
return -1;
StructuredProperty l = CleaningFunctions.normalizePidValue(left);
StructuredProperty r = CleaningFunctions.normalizePidValue(right);
return Optional
.ofNullable(l.getValue())
.map(
lv -> Optional
.ofNullable(r.getValue())
.map(rv -> lv.compareTo(rv))
.orElse(-1))
.orElse(1);
}
}

View File

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.Comparator;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class ResultPidComparator implements Comparator<StructuredProperty> {
@Override
public int compare(StructuredProperty left, StructuredProperty right) {
PidType lClass = PidType.tryValueOf(left.getQualifier().getClassid());
PidType rClass = PidType.tryValueOf(right.getQualifier().getClassid());
if (lClass.equals(PidType.doi))
return -1;
if (rClass.equals(PidType.doi))
return 1;
if (lClass.equals(PidType.pmid))
return -1;
if (rClass.equals(PidType.pmid))
return 1;
if (lClass.equals(PidType.pmc))
return -1;
if (rClass.equals(PidType.pmc))
return 1;
if (lClass.equals(PidType.handle))
return -1;
if (rClass.equals(PidType.handle))
return 1;
if (lClass.equals(PidType.arXiv))
return -1;
if (rClass.equals(PidType.arXiv))
return 1;
if (lClass.equals(PidType.nct))
return -1;
if (rClass.equals(PidType.nct))
return 1;
if (lClass.equals(PidType.pdb))
return -1;
if (rClass.equals(PidType.pdb))
return 1;
return 0;
}
}

View File

@ -0,0 +1,77 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static eu.dnetlib.dhp.schema.common.ModelConstants.CROSSREF_ID;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Optional;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Result;
public class ResultTypeComparator implements Comparator<Result> {
@Override
public int compare(Result left, Result right) {
if (left == null && right == null)
return 0;
if (left == null)
return 1;
if (right == null)
return -1;
HashSet<String> lCf = getCollectedFromIds(left);
HashSet<String> rCf = getCollectedFromIds(right);
if (lCf.contains(CROSSREF_ID) && !rCf.contains(CROSSREF_ID)) {
return -1;
}
if (!lCf.contains(CROSSREF_ID) && rCf.contains(CROSSREF_ID)) {
return 1;
}
Result.RESULTTYPE lType = left.getResulttype();
Result.RESULTTYPE rType = right.getResulttype();
if (lType.equals(rType))
return 0;
if (lType.equals(Result.RESULTTYPE.publication))
return -1;
if (rType.equals(Result.RESULTTYPE.publication))
return 1;
if (lType.equals(Result.RESULTTYPE.dataset))
return -1;
if (rType.equals(Result.RESULTTYPE.dataset))
return 1;
if (lType.equals(Result.RESULTTYPE.software))
return -1;
if (rType.equals(Result.RESULTTYPE.software))
return 1;
if (lType.equals(Result.RESULTTYPE.otherresearchproduct))
return -1;
if (rType.equals(Result.RESULTTYPE.otherresearchproduct))
return 1;
// Else (but unlikely), lexicographical ordering will do.
return lType.compareTo(rType);
}
protected HashSet<String> getCollectedFromIds(Result left) {
return Optional
.ofNullable(left.getCollectedfrom())
.map(
cf -> cf
.stream()
.map(KeyValue::getKey)
.collect(Collectors.toCollection(HashSet::new)))
.orElse(new HashSet<>());
}
}

View File

@ -1,25 +0,0 @@
package eu.dnetlib.scholexplorer.relation;
import java.io.Serializable;
public class RelInfo implements Serializable {
private String original;
private String inverse;
public String getOriginal() {
return original;
}
public void setOriginal(String original) {
this.original = original;
}
public String getInverse() {
return inverse;
}
public void setInverse(String inverse) {
this.inverse = inverse;
}
}

View File

@ -1,20 +0,0 @@
package eu.dnetlib.scholexplorer.relation;
import java.io.Serializable;
import java.util.HashMap;
import org.apache.commons.io.IOUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
public class RelationMapper extends HashMap<String, RelInfo> implements Serializable {
public static RelationMapper load() throws Exception {
final String json = IOUtils.toString(RelationMapper.class.getResourceAsStream("relations.json"));
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(json, RelationMapper.class);
}
}

View File

@ -1,158 +0,0 @@
{
"cites":{
"original":"Cites",
"inverse":"IsCitedBy"
},
"compiles":{
"original":"Compiles",
"inverse":"IsCompiledBy"
},
"continues":{
"original":"Continues",
"inverse":"IsContinuedBy"
},
"derives":{
"original":"IsSourceOf",
"inverse":"IsDerivedFrom"
},
"describes":{
"original":"Describes",
"inverse":"IsDescribedBy"
},
"documents":{
"original":"Documents",
"inverse":"IsDocumentedBy"
},
"hasmetadata":{
"original":"HasMetadata",
"inverse":"IsMetadataOf"
},
"hasassociationwith":{
"original":"HasAssociationWith",
"inverse":"HasAssociationWith"
},
"haspart":{
"original":"HasPart",
"inverse":"IsPartOf"
},
"hasversion":{
"original":"HasVersion",
"inverse":"IsVersionOf"
},
"iscitedby":{
"original":"IsCitedBy",
"inverse":"Cites"
},
"iscompiledby":{
"original":"IsCompiledBy",
"inverse":"Compiles"
},
"iscontinuedby":{
"original":"IsContinuedBy",
"inverse":"Continues"
},
"isderivedfrom":{
"original":"IsDerivedFrom",
"inverse":"IsSourceOf"
},
"isdescribedby":{
"original":"IsDescribedBy",
"inverse":"Describes"
},
"isdocumentedby":{
"original":"IsDocumentedBy",
"inverse":"Documents"
},
"isidenticalto":{
"original":"IsIdenticalTo",
"inverse":"IsIdenticalTo"
},
"ismetadatafor":{
"original":"IsMetadataFor",
"inverse":"IsMetadataOf"
},
"ismetadataof":{
"original":"IsMetadataOf",
"inverse":"IsMetadataFor"
},
"isnewversionof":{
"original":"IsNewVersionOf",
"inverse":"IsPreviousVersionOf"
},
"isobsoletedby":{
"original":"IsObsoletedBy",
"inverse":"Obsoletes"
},
"isoriginalformof":{
"original":"IsOriginalFormOf",
"inverse":"IsVariantFormOf"
},
"ispartof":{
"original":"IsPartOf",
"inverse":"HasPart"
},
"ispreviousversionof":{
"original":"IsPreviousVersionOf",
"inverse":"IsNewVersionOf"
},
"isreferencedby":{
"original":"IsReferencedBy",
"inverse":"References"
},
"isrelatedto":{
"original":"IsRelatedTo",
"inverse":"IsRelatedTo"
},
"isrequiredby":{
"original":"IsRequiredBy",
"inverse":"Requires"
},
"isreviewedby":{
"original":"IsReviewedBy",
"inverse":"Reviews"
},
"issourceof":{
"original":"IsSourceOf",
"inverse":"IsDerivedFrom"
},
"issupplementedby":{
"original":"IsSupplementedBy",
"inverse":"IsSupplementTo"
},
"issupplementto":{
"original":"IsSupplementTo",
"inverse":"IsSupplementedBy"
},
"isvariantformof":{
"original":"IsVariantFormOf",
"inverse":"IsOriginalFormOf"
},
"isversionof":{
"original":"IsVersionOf",
"inverse":"HasVersion"
},
"obsoletes":{
"original":"Obsoletes",
"inverse":"IsObsoletedBy"
},
"references":{
"original":"References",
"inverse":"IsReferencedBy"
},
"requires":{
"original":"Requires",
"inverse":"IsRequiredBy"
},
"related":{
"original":"IsRelatedTo",
"inverse":"IsRelatedTo"
},
"reviews":{
"original":"Reviews",
"inverse":"IsReviewedBy"
},
"unknown":{
"original":"Unknown",
"inverse":"Unknown"
}
}

View File

@ -22,19 +22,6 @@ object ScholixUtils extends Serializable {
case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {} case class RelatedEntities(id: String, relatedDataset: Long, relatedPublication: Long) {}
val relations: Map[String, RelationVocabulary] = {
val input = Source
.fromInputStream(
getClass.getResourceAsStream("/eu/dnetlib/scholexplorer/relation/relations.json")
)
.mkString
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input)
json.extract[Map[String, RelationVocabulary]]
}
def extractRelationDate(relation: Relation): String = { def extractRelationDate(relation: Relation): String = {
if (relation.getProperties == null || !relation.getProperties.isEmpty) if (relation.getProperties == null || !relation.getProperties.isEmpty)
@ -175,12 +162,11 @@ object ScholixUtils extends Serializable {
} }
def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = { def extractCollectedFrom(relation: Relation): List[ScholixEntityId] = {
if (relation.getCollectedfrom != null && !relation.getCollectedfrom.isEmpty) { if (relation.getProvenance != null && !relation.getProvenance.isEmpty) {
val l: List[ScholixEntityId] = relation.getProvenance.asScala.map { p =>
val l: List[ScholixEntityId] = relation.getCollectedfrom.asScala.map { c =>
new ScholixEntityId( new ScholixEntityId(
c.getValue, p.getCollectedfrom.getValue,
List(new ScholixIdentifier(c.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava List(new ScholixIdentifier(p.getCollectedfrom.getKey, DNET_IDENTIFIER_SCHEMA, null)).asJava
) )
}.toList }.toList
l l
@ -289,11 +275,8 @@ object ScholixUtils extends Serializable {
s.setPublisher(source.getPublisher) s.setPublisher(source.getPublisher)
} }
val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null)
if (semanticRelation == null)
return null
s.setRelationship( s.setRelationship(
new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse) new ScholixRelationship(relation.getRelClass.toString, "datacite", relation.getRelClass.getInverse.toString)
) )
s.setSource(source) s.setSource(source)
@ -331,12 +314,10 @@ object ScholixUtils extends Serializable {
s.setPublisher(l.asJava) s.setPublisher(l.asJava)
} }
val semanticRelation = relations.getOrElse(relation.getRelClass.toLowerCase, null)
if (semanticRelation == null)
return null
s.setRelationship( s.setRelationship(
new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse) new ScholixRelationship(relation.getRelClass.toString, "datacite", relation.getRelClass.getInverse.toString)
) )
s.setSource(generateScholixResourceFromSummary(source)) s.setSource(generateScholixResourceFromSummary(source))
s s
@ -402,15 +383,15 @@ object ScholixUtils extends Serializable {
.getInstance() .getInstance()
.asScala .asScala
.filter(i => i.getDateofacceptance != null) .filter(i => i.getDateofacceptance != null)
.map(i => i.getDateofacceptance.getValue) .map(i => i.getDateofacceptance)
.toList .toList
if (dt.nonEmpty) if (dt.nonEmpty)
s.setDate(dt.distinct.asJava) s.setDate(dt.distinct.asJava)
} }
if (r.getDescription != null && !r.getDescription.isEmpty) { if (r.getDescription != null && !r.getDescription.isEmpty) {
val d = r.getDescription.asScala.find(f => f != null && f.getValue != null) val d = r.getDescription.asScala.find(f => f != null)
if (d.isDefined) if (d.isDefined)
s.setDescription(d.get.getValue) s.setDescription(d.get)
} }
if (r.getSubject != null && !r.getSubject.isEmpty) { if (r.getSubject != null && !r.getSubject.isEmpty) {
@ -422,7 +403,7 @@ object ScholixUtils extends Serializable {
} }
if (r.getPublisher != null) if (r.getPublisher != null)
s.setPublisher(List(r.getPublisher.getValue).asJava) s.setPublisher(List(r.getPublisher.getName).asJava)
if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) { if (r.getCollectedfrom != null && !r.getCollectedfrom.isEmpty) {
val cf: List[CollectedFromType] = r.getCollectedfrom.asScala val cf: List[CollectedFromType] = r.getCollectedfrom.asScala

View File

@ -15,7 +15,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
public class MdStoreClientTest { public class MdStoreClientTest {
@Test
public void testMongoCollection() throws IOException { public void testMongoCollection() throws IOException {
final MdstoreClient client = new MdstoreClient("mongodb://localhost:27017", "mdstore"); final MdstoreClient client = new MdstoreClient("mongodb://localhost:27017", "mdstore");

View File

@ -0,0 +1,76 @@
package eu.dnetlib.dhp.common.vocabulary;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import static org.mockito.Mockito.lenient;
@ExtendWith(MockitoExtension.class)
public class VocabularyTest {
@Mock
protected ISLookUpService isLookUpService;
protected VocabularyGroup vocabularies;
@BeforeEach
public void setUpVocabulary() throws ISLookUpException, IOException {
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
lenient()
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
.thenReturn(synonyms());
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
}
private static List<String> vocs() throws IOException {
return IOUtils
.readLines(
Objects
.requireNonNull(
VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/terms.txt")));
}
private static List<String> synonyms() throws IOException {
return IOUtils
.readLines(
Objects
.requireNonNull(
VocabularyTest.class.getResourceAsStream("/eu/dnetlib/dhp/transform/synonyms.txt")));
}
@Test
void testVocabularyMatch() throws Exception {
final String s = IOUtils.toString(this.getClass().getResourceAsStream("terms"));
for (String s1 : s.split("\n")) {
final Qualifier t1 = vocabularies.getSynonymAsQualifier("dnet:publication_resource", s1);
if (t1 == null) {
System.err.println(s1 + " Missing");
} else {
System.out.println("syn=" + s1 + " term = " + t1.getClassid() + " " + t1.getClassname());
Qualifier synonymAsQualifier = vocabularies.getSynonymAsQualifier("dnet:result_typologies", t1.getClassid());
if (synonymAsQualifier!= null)
System.out
.println(
synonymAsQualifier.getClassname());
}
}
}
}

View File

@ -0,0 +1,39 @@
package eu.dnetlib.dhp.schema.oaf.common;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
public class ModelSupportTest {
@Nested
class IsSubClass {
@Test
void shouldReturnFalseWhenSubClassDoesNotExtendSuperClass() {
// when
Boolean result = ModelSupport.isSubClass(Relation.class, Entity.class);
// then
assertFalse(result);
}
@Test
void shouldReturnTrueWhenSubClassExtendsSuperClass() {
// when
Boolean result = ModelSupport.isSubClass(Result.class, Entity.class);
// then
assertTrue(result);
}
}
}

View File

@ -0,0 +1,21 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import java.util.Set;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
class BlackListProviderTest {
@Test
void blackListTest() {
Assertions.assertNotNull(PidBlacklistProvider.getBlacklist());
Assertions.assertNotNull(PidBlacklistProvider.getBlacklist().get("doi"));
Assertions.assertTrue(PidBlacklistProvider.getBlacklist().get("doi").size() > 0);
final Set<String> xxx = PidBlacklistProvider.getBlacklist("xxx");
Assertions.assertNotNull(xxx);
Assertions.assertEquals(0, xxx.size());
}
}

View File

@ -0,0 +1,103 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Publication;
class IdentifierFactoryTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@Test
void testCreateIdentifierForPublication() throws IOException {
verifyIdentifier(
"publication_doi1.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
verifyIdentifier(
"publication_doi2.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
verifyIdentifier(
"publication_doi3.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
verifyIdentifier(
"publication_doi4.json", "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66", true);
verifyIdentifier(
"publication_doi5.json", "50|doi_________::3bef95c0ca26dd55451fc8839ea69d27", true);
verifyIdentifier(
"publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", true);
verifyIdentifier(
"publication_pmc2.json", "50|pmc_________::94e4cb08c93f8733b48e2445d04002ac", true);
verifyIdentifier(
"publication_openapc.json", "50|doi_________::79dbc7a2a56dc1532659f9038843256e", true);
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
verifyIdentifier("publication_3.json", defaultID, true);
verifyIdentifier("publication_4.json", defaultID, true);
verifyIdentifier("publication_5.json", defaultID, true);
}
@Test
void testCreateIdentifierForPublicationNoHash() throws IOException {
verifyIdentifier("publication_doi1.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
verifyIdentifier("publication_doi2.json", "50|doi_________::10.1016/j.cmet.2010.03.013", false);
verifyIdentifier("publication_pmc1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
verifyIdentifier(
"publication_urn1.json", "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", false);
final String defaultID = "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f";
verifyIdentifier("publication_3.json", defaultID, false);
verifyIdentifier("publication_4.json", defaultID, false);
verifyIdentifier("publication_5.json", defaultID, false);
}
@Test
void testCreateIdentifierForROHub() throws IOException {
verifyIdentifier(
"orp-rohub.json", "50|w3id________::afc7592914ae190a50570db90f55f9c2", true);
}
protected void verifyIdentifier(String filename, String expectedID, boolean md5) throws IOException {
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
final Publication pub = OBJECT_MAPPER.readValue(json, Publication.class);
assertEquals(expectedID, IdentifierFactory.createIdentifier(pub, md5));
}
@Test
void testNormalizeDOI() throws Exception {
final String doi = "10.1042/BCJ20160876";
assertEquals(CleaningFunctions.normalizePidValue("doi", doi), doi.toLowerCase());
final String doi2 = "0.1042/BCJ20160876";
assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi2));
final String doi3 = "https://doi.org/0.1042/BCJ20160876";
assertThrows(IllegalArgumentException.class, () -> CleaningFunctions.normalizePidValue("doi", doi3));
final String doi4 = "https://doi.org/10.1042/BCJ20160876";
assertEquals(CleaningFunctions.normalizePidValue("doi", doi4), "10.1042/BCJ20160876".toLowerCase());
final String doi5 = "https://doi.org/10.1042/ BCJ20160876";
assertEquals(CleaningFunctions.normalizePidValue("doi", doi5), "10.1042/BCJ20160876".toLowerCase());
}
}

View File

@ -0,0 +1,110 @@
package eu.dnetlib.dhp.schema.oaf.utils;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
public class MergeUtilsTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@Test
void testMergePubs() throws IOException {
Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class);
Dataset d2 = read("dataset_2.json", Dataset.class);
assertEquals(1, p1.getCollectedfrom().size());
assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
assertEquals(1, d2.getCollectedfrom().size());
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(1, p2.getCollectedfrom().size());
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(1, d1.getCollectedfrom().size());
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
final Result p1d2 = MergeUtils.merge(p1, d2);
assertEquals(Result.RESULTTYPE.publication, p1d2.getResulttype());
assertTrue(p1d2 instanceof Publication);
assertEquals(p1.getId(), p1d2.getId());
}
@Test
void testMergePubs_1() throws IOException {
Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class);
final Result p2d1 = MergeUtils.merge(p2, d1);
assertEquals(Result.RESULTTYPE.dataset, p2d1.getResulttype());
assertTrue(p2d1 instanceof Dataset);
assertEquals(d1.getId(), p2d1.getId());
assertEquals(2, p2d1.getCollectedfrom().size());
}
@Test
void testMergePubs_2() throws IOException {
Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class);
Result p1p2 = MergeUtils.merge(p1, p2);
assertTrue(p1p2 instanceof Publication);
assertEquals(p1.getId(), p1p2.getId());
assertEquals(2, p1p2.getCollectedfrom().size());
}
@Test
void testDelegatedAuthority_1() throws IOException {
Dataset d1 = read("dataset_2.json", Dataset.class);
Dataset d2 = read("dataset_delegated.json", Dataset.class);
assertEquals(1, d2.getCollectedfrom().size());
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
Result res = MergeUtils.merge(d1, d2, true);
assertEquals(d2, res);
}
@Test
void testDelegatedAuthority_2() throws IOException {
Dataset p1 = read("publication_1.json", Dataset.class);
Dataset d2 = read("dataset_delegated.json", Dataset.class);
assertEquals(1, d2.getCollectedfrom().size());
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
Result res = MergeUtils.merge(p1, d2, true);
assertEquals(d2, res);
}
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
}
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
return OBJECT_MAPPER.readValue(json, clazz);
}
}

View File

@ -142,66 +142,13 @@ class OafMapperUtilsTest {
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189")); assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367189"));
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222")); assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222"));
assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333")); assertEquals("2013-11-12", GraphCleaningFunctions.cleanDate("1384216367111222333"));
} }
@Test @Test
void testDate() { void testDate() {
final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998"); final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
assertNotNull(date); assertNotNull(date);
System.out.println(date); assertEquals("1998-02-23", date);
}
@Test
void testMergePubs() throws IOException {
Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class);
Dataset d2 = read("dataset_2.json", Dataset.class);
assertEquals(1, p1.getCollectedfrom().size());
assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
assertEquals(1, d2.getCollectedfrom().size());
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
OafMapperUtils
.mergeResults(p1, d2)
.getResulttype()
.getClassid());
assertEquals(1, p2.getCollectedfrom().size());
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(1, d1.getCollectedfrom().size());
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(
ModelConstants.DATASET_RESULTTYPE_CLASSID,
OafMapperUtils
.mergeResults(p2, d1)
.getResulttype()
.getClassid());
}
@Test
void testDelegatedAuthority() throws IOException {
Dataset d1 = read("dataset_2.json", Dataset.class);
Dataset d2 = read("dataset_delegated.json", Dataset.class);
assertEquals(1, d2.getCollectedfrom().size());
assertTrue(cfId(d2.getCollectedfrom()).contains(ModelConstants.ZENODO_OD_ID));
Result res = OafMapperUtils.mergeResults(d1, d2);
assertEquals(d2, res);
System.out.println(OBJECT_MAPPER.writeValueAsString(res));
}
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
} }
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException { protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {

View File

@ -1,14 +0,0 @@
package eu.dnetlib.scholexplorer.relation;
import org.junit.jupiter.api.Test;
class RelationMapperTest {
@Test
void testLoadRels() throws Exception {
RelationMapper relationMapper = RelationMapper.load();
relationMapper.keySet().forEach(System.out::println);
}
}

View File

@ -0,0 +1,34 @@
grant
book
report-series
report-component
book-series
peer-review
component
report
book-track
database
standard
journal-volume
proceedings-series
preprint
book-section
letter
reference-book
edited-book
journal-issue
dataset
reference-entry
dissertation
book-chapter
book-part
journal
book-set
working_paper
dissertation
other
proceedings-article
journal-article
other
proceedings
monograph

View File

@ -1 +1,28 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]} {
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
"resuttype": "dataset",
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2011.03.013"
},
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
],
"collectedfrom": [
{
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
"value": "Crossref"
}
]
}

View File

@ -1,6 +1,6 @@
{ {
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
"resuttype": {"classid": "dataset"}, "resuttype": "dataset",
"pid": [ "pid": [
{ {
"qualifier": {"classid": "doi"}, "qualifier": {"classid": "doi"},
@ -30,8 +30,7 @@
"refereed": { "refereed": {
"classid": "0000", "classid": "0000",
"classname": "UNKNOWN", "classname": "UNKNOWN",
"schemeid": "dnet:review_levels", "schemeid": "dnet:review_levels"
"schemename": "dnet:review_levels"
}, },
"hostedby": { "hostedby": {
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
@ -40,45 +39,15 @@
"accessright": { "accessright": {
"classid": "OPEN", "classid": "OPEN",
"classname": "Open Access", "classname": "Open Access",
"schemeid": "dnet:access_modes", "schemeid": "dnet:access_modes"
"schemename": "dnet:access_modes"
},
"processingchargecurrency": {
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"value": "EUR"
}, },
"processingchargecurrency": "EUR",
"pid": [ "pid": [
{ {
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"qualifier": { "qualifier": {
"classid": "doi", "classid": "doi",
"classname": "Digital Object Identifier", "classname": "Digital Object Identifier",
"schemeid": "dnet:pid_types", "schemeid": "dnet:pid_types"
"schemename": "dnet:pid_types"
}, },
"value": "10.1371/journal.pone.0085605" "value": "10.1371/journal.pone.0085605"
} }
@ -87,24 +56,10 @@
"url": ["https://doi.org/10.1371/journal.pone.0085605"], "url": ["https://doi.org/10.1371/journal.pone.0085605"],
"alternateIdentifier": [ "alternateIdentifier": [
{ {
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"qualifier": { "qualifier": {
"classid": "pmid", "classid": "pmid",
"classname": "PubMed ID", "classname": "PubMed ID",
"schemeid": "dnet:pid_types", "schemeid": "dnet:pid_types"
"schemename": "dnet:pid_types"
}, },
"value": "24454899.0" "value": "24454899.0"
} }
@ -113,27 +68,11 @@
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
"value": "Repository B" "value": "Repository B"
}, },
"processingchargeamount": { "processingchargeamount": "1022.02",
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"value": "1022.02"
},
"instancetype": { "instancetype": {
"classid": "0004", "classid": "0004",
"classname": "Conference object", "classname": "Conference object",
"schemeid": "dnet:publication_resource", "schemeid": "dnet:publication_resource"
"schemename": "dnet:publication_resource"
} }
} }
] ]

View File

@ -1,6 +1,6 @@
{ {
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g",
"resuttype": {"classid": "dataset"}, "resuttype": "dataset",
"pid": [ "pid": [
{ {
"qualifier": {"classid": "doi"}, "qualifier": {"classid": "doi"},
@ -30,8 +30,7 @@
"refereed": { "refereed": {
"classid": "0000", "classid": "0000",
"classname": "UNKNOWN", "classname": "UNKNOWN",
"schemeid": "dnet:review_levels", "schemeid": "dnet:review_levels"
"schemename": "dnet:review_levels"
}, },
"hostedby": { "hostedby": {
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
@ -40,45 +39,15 @@
"accessright": { "accessright": {
"classid": "OPEN", "classid": "OPEN",
"classname": "Open Access", "classname": "Open Access",
"schemeid": "dnet:access_modes", "schemeid": "dnet:access_modes"
"schemename": "dnet:access_modes"
},
"processingchargecurrency": {
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"value": "EUR"
}, },
"processingchargecurrency": "EUR",
"pid": [ "pid": [
{ {
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"qualifier": { "qualifier": {
"classid": "doi", "classid": "doi",
"classname": "Digital Object Identifier", "classname": "Digital Object Identifier",
"schemeid": "dnet:pid_types", "schemeid": "dnet:pid_types"
"schemename": "dnet:pid_types"
}, },
"value": "10.1371/journal.pone.0085605" "value": "10.1371/journal.pone.0085605"
} }
@ -87,24 +56,10 @@
"url": ["https://doi.org/10.1371/journal.pone.0085605"], "url": ["https://doi.org/10.1371/journal.pone.0085605"],
"alternateIdentifier": [ "alternateIdentifier": [
{ {
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"qualifier": { "qualifier": {
"classid": "pmid", "classid": "pmid",
"classname": "PubMed ID", "classname": "PubMed ID",
"schemeid": "dnet:pid_types", "schemeid": "dnet:pid_types"
"schemename": "dnet:pid_types"
}, },
"value": "24454899.0" "value": "24454899.0"
} }
@ -113,27 +68,11 @@
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69", "key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo" "value": "Zenodo"
}, },
"processingchargeamount": { "processingchargeamount": "1022.02",
"dataInfo": {
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "Harvested",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"deletedbyinference": false,
"inferred": false,
"inferenceprovenance": "",
"invisible": true,
"trust": "0.9"
},
"value": "1022.02"
},
"instancetype": { "instancetype": {
"classid": "0004", "classid": "0004",
"classname": "Conference object", "classname": "Conference object",
"schemeid": "dnet:publication_resource", "schemeid": "dnet:publication_resource"
"schemename": "dnet:publication_resource"
} }
} }
] ]

View File

@ -0,0 +1,197 @@
{
"collectedfrom": [
{
"key": "10|fairsharing_::1b69ebedb522700034547abc5652ffac",
"value": "ROHub",
"dataInfo": null
}
],
"dataInfo": {
"invisible": false,
"inferred": false,
"deletedbyinference": false,
"trust": "0.9",
"inferenceprovenance": null,
"provenanceaction": {
"classid": "sysimport:crosswalk:repository",
"classname": "sysimport:crosswalk:repository",
"schemeid": "dnet:provenanceActions"
}
},
"lastupdatetimestamp": 1663926081966,
"id": "50|w3id________::afc7592914ae190a50570db90f55f9c2",
"originalId": [
"50|fsh_____4119::afc7592914ae190a50570db90f55f9c2",
"https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca"
],
"pid": [
{
"value": "https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca",
"qualifier": {
"classid": "w3id",
"classname": "w3id.org",
"schemeid": "dnet:pid_types"
}
}
],
"dateofcollection": "2019-03-27T15:15:22.22Z",
"dateoftransformation": "2019-04-17T16:04:20.586Z",
"extraInfo": [],
"oaiprovenance": null,
"processingchargeamount": null,
"processingchargecurrency": null,
"measures": null,
"author": [
{
"fullname": "CNR-ISMAR",
"name": "",
"surname": "",
"rank": 1,
"pid": []
}
],
"resulttype": "otherresearchproduct",
"language": {
"classid": "UNKNOWN",
"classname": "Unknown",
"schemeid": "dnet:languages"
},
"country": [],
"subject": [
{
"value": "Ecology",
"qualifier": {
"classid": "",
"classname": "",
"schemeid": ""
},
"dataInfo": {
"inferred": false,
"trust": "0.9",
"inferenceprovenance": null,
"provenanceaction": {
"classid": "sysimport:crosswalk:repository",
"classname": "sysimport:crosswalk:repository",
"schemeid": "dnet:provenanceActions"
}
}
},
{
"value": "EOSC::RO-crate",
"qualifier": {
"classid": "",
"classname": "",
"schemeid": ""
},
"dataInfo": {
"inferred": false,
"trust": "0.9",
"inferenceprovenance": null,
"provenanceaction": {
"classid": "sysimport:crosswalk:repository",
"classname": "sysimport:crosswalk:repository",
"schemeid": "dnet:provenanceActions"
}
}
}
],
"title": [
{
"value": "Using biological effects tools to define Good Environmental Status under the European Union Marine Strategy Framework Directive",
"qualifier": {
"classid": "main title",
"classname": "main title",
"schemeid": "dnet:dataCite_title"
}
}
],
"relevantdate": [
{
"value": "2018-06-20T11:21:46Z",
"qualifier": {
"classid": "UNKNOWN",
"classname": "UNKNOWN",
"schemeid": "dnet:dataCite_date"
}
}
],
"description": [
"The use of biological effects tools offer enormous potential to meet the challenges outlined by the European Union Marine Strategy Framework Directive (MSFD) whereby Member States are required to develop a robust set of tools for defining 11 qualitative descriptors of Good Environmental Status (GES), such as demonstrating that \"Concentrations of contaminants are at levels not giving rise to pollution effects\" (GES Descriptor 8). This paper discusses the combined approach of monitoring chemical contaminant levels, along side biological effect measurements relating to the effect of pollutants, for undertaking assessments of GES across European marine regions. We outline the minimum standards that biological effects tools should meet if they are to be used for defining GES in relation to Descriptor 8 and describe the current international initiatives underway to develop assessment criteria for these biological effects techniques. Crown Copyright (C) 2010 Published by Elsevier Ltd. All rights reserved."
],
"dateofacceptance": null,
"publisher": {
"name": "Poznań Supercomputing and Networking Center"
},
"embargoenddate": null,
"source": [],
"fulltext": [],
"format": [],
"contributor": [
"Generation Service"
],
"resourcetype": {
"classid": "RO-crate",
"classname": "RO-crate",
"schemeid": "dnet:dataCite_resource",
"schemename": "dnet:dataCite_resource"
},
"coverage": [],
"bestaccessright": {
"classid": "OPEN",
"classname": "Open Access",
"schemeid": "dnet:access_modes",
"schemename": "dnet:access_modes"
},
"context": [],
"externalReference": [],
"instance": [
{
"license": null,
"accessright": {
"classid": "OPEN",
"classname": "Open Access",
"schemeid": "dnet:access_modes",
"openAccessRoute": null
},
"instancetype": {
"classid": "other research product",
"classname": "other research product",
"schemeid": "dnet:publication_resource"
},
"hostedby": {
"key": "10|fairsharing_::1b69ebedb522700034547abc5652ffac",
"value": "ROHub"
},
"url": null,
"distributionlocation": null,
"collectedfrom": {
"key": "10|fairsharing_::1b69ebedb522700034547abc5652ffac",
"value": "ROHub"
},
"pid": [
{
"value": "https://w3id.org/ro-id/0ab171a7-45c5-4194-82d4-850955504bca",
"qualifier": {
"classid": "w3id",
"classname": "w3id.org",
"schemeid": "dnet:pid_types"
}
}
],
"alternateIdentifier": [],
"dateofacceptance": null,
"processingchargeamount": null,
"processingchargecurrency": null,
"refereed": {
"classid": "UNKNOWN",
"classname": "Unknown",
"schemeid": "dnet:review_levels"
},
"measures": null
}
],
"eoscifguidelines": null,
"contactperson": [],
"contactgroup": [],
"tool": []
}

View File

@ -1 +1,28 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]} {
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"resuttype": "publication",
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2011.03.013"
},
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
],
"collectedfrom": [
{
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
"value": "Crossref"
}
]
}

View File

@ -1 +1,28 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]} {
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"resuttype": "publication",
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2011.03.013"
},
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
],
"collectedfrom": [
{
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3",
"value": "Repository A"
}
]
}

View File

@ -0,0 +1 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[{"qualifier":{"classid":"scp-number"},"value":"79953761260"}]}

View File

@ -0,0 +1 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f","pid":[]}

View File

@ -0,0 +1 @@
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f"}

View File

@ -0,0 +1,33 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
"value": "Crossref"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
}
]
},
{
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}
]
}

View File

@ -0,0 +1,37 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2",
"value": "Crossref"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
}
]
},
{
"collectedfrom": {
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
"value": "Europe PubMed Central"
},
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}
]
}

View File

@ -0,0 +1,37 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
}
]
},
{
"collectedfrom": {
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
"value": "Europe PubMed Central"
},
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}
]
}

View File

@ -0,0 +1,37 @@
{
"id": "50|od______2852::38861c44e6052a8d49f59a4c39ba5e66",
"instance": [
{
"collectedfrom": {
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
},
{
"qualifier": {"classid": "handle"},
"value": "11012/83840"
}
]
},
{
"collectedfrom": {
"key": "10|opendoar____::2852",
"value": "Digital library of Brno University of Technology"
},
"pid": [
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
},
{
"qualifier": {"classid": "handle"},
"value": "11012/83840"
}
]
}
]
}

View File

@ -0,0 +1,37 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|opendoar____::358aee4cc897452c00244351e4d91f69",
"value": "Zenodo"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.5281/zenodo.5121485"
}
]
},
{
"collectedfrom": {
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
"value": "Europe PubMed Central"
},
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}
]
}

View File

@ -0,0 +1,31 @@
{
"id": "50|openapc_____::000023f9cb6e3a247c764daec4273cbc",
"resuttype": {
"classid": "publication"
},
"instance": [
{
"collectedfrom": {
"key": "10|apc_________::e2b1600b229fc30663c8a1f662debddf",
"value": "OpenAPC Global Initiative"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
},
{
"qualifier": {"classid": "pmid"},
"value": "25811027"
}
],
"url":["https://doi.org/10.1155/2015/439379"]
}
]
}

View File

@ -0,0 +1,17 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"pid": [
{
"qualifier": {"classid": "urn"},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {"classid": "scp-number"},
"value": "79953761260"
},
{
"qualifier": {"classid": "pmc"},
"value": "21459329"
}
]
}

View File

@ -0,0 +1,21 @@
{
"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"instance": [
{
"collectedfrom": {
"key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c",
"value": "Europe PubMed Central"
},
"pid": [
{
"qualifier": {"classid": "doi"},
"value": "10.1016/j.cmet.2010.03.013"
},
{
"qualifier":{"classid":"pmc"},
"value":"21459329"
}
]
}
]
}

View File

@ -0,0 +1,23 @@
{
"id": "50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f",
"pid": [
{
"qualifier": {
"classid": "urn"
},
"value": "urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"
},
{
"qualifier": {
"classid": "scp-number"
},
"value": "79953761260"
},
{
"qualifier": {
"classid": "pmcid"
},
"value": "21459329"
}
]
}

View File

@ -4,7 +4,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<version>1.2.5-SNAPSHOT</version> <version>2.0.0-SNAPSHOT</version>
</parent> </parent>
<artifactId>dhp-actionmanager</artifactId> <artifactId>dhp-actionmanager</artifactId>

View File

@ -1,14 +1,13 @@
package eu.dnetlib.dhp.actionmanager.promote; package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass; import static eu.dnetlib.dhp.schema.oaf.common.ModelSupport.isSubClass;
import java.util.function.BiFunction; import java.util.function.BiFunction;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.schema.oaf.Relation;
/** OAF model merging support. */ /** OAF model merging support. */
public class MergeAndGet { public class MergeAndGet {
@ -46,38 +45,31 @@ public class MergeAndGet {
} }
private static <G extends Oaf, A extends Oaf> G mergeFromAndGet(G x, A y) { private static <G extends Oaf, A extends Oaf> G mergeFromAndGet(G x, A y) {
if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) { return (G) MergeUtils.merge(x, y);
((Relation) x).mergeFrom((Relation) y);
return x;
} else if (isSubClass(x, OafEntity.class)
&& isSubClass(y, OafEntity.class)
&& isSubClass(x, y)) {
((OafEntity) x).mergeFrom((OafEntity) y);
return x;
}
throw new RuntimeException(
String
.format(
"MERGE_FROM_AND_GET incompatible types: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) { private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
if (x.getClass().equals(y.getClass()) if (isSubClass(x, Entity.class) && isSubClass(x, Entity.class)) {
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { Entity xE = (Entity) x;
return x; Entity yE = (Entity) y;
} else if (x.getClass().equals(y.getClass())
&& x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) { if (xE.getClass().equals(yE.getClass())
return (G) y; && xE.getLastupdatetimestamp() > yE.getLastupdatetimestamp()) {
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { return x;
return x; } else if (xE.getClass().equals(yE.getClass())
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) { && xE.getLastupdatetimestamp() < yE.getLastupdatetimestamp()) {
throw new RuntimeException( return (G) y;
String } else if (isSubClass(xE, yE) && xE.getLastupdatetimestamp() > yE.getLastupdatetimestamp()) {
.format( return x;
"SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s", } else if (isSubClass(xE, yE) && xE.getLastupdatetimestamp() < yE.getLastupdatetimestamp()) {
x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); throw new RuntimeException(
String
.format(
"SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
}
} }
throw new RuntimeException( throw new RuntimeException(
String String

View File

@ -2,7 +2,6 @@
package eu.dnetlib.dhp.actionmanager.promote; package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import java.io.IOException; import java.io.IOException;
import java.util.Optional; import java.util.Optional;
@ -26,8 +25,8 @@ import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
/** Applies a given action payload file to graph table of compatible type. */ /** Applies a given action payload file to graph table of compatible type. */
public class PromoteActionPayloadForGraphTableJob { public class PromoteActionPayloadForGraphTableJob {
@ -104,7 +103,7 @@ public class PromoteActionPayloadForGraphTableJob {
private static void throwIfGraphTableClassIsNotSubClassOfActionPayloadClass( private static void throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(
Class<? extends Oaf> rowClazz, Class<? extends Oaf> actionPayloadClazz) { Class<? extends Oaf> rowClazz, Class<? extends Oaf> actionPayloadClazz) {
if (!isSubClass(rowClazz, actionPayloadClazz)) { if (!ModelSupport.isSubClass(rowClazz, actionPayloadClazz)) {
String msg = String String msg = String
.format( .format(
"graph table class is not a subclass of action payload class: graph=%s, action=%s", "graph table class is not a subclass of action payload class: graph=%s, action=%s",
@ -242,11 +241,11 @@ public class PromoteActionPayloadForGraphTableJob {
private static <T extends Oaf> Function<T, Boolean> isNotZeroFnUsingIdOrSourceAndTarget() { private static <T extends Oaf> Function<T, Boolean> isNotZeroFnUsingIdOrSourceAndTarget() {
return t -> { return t -> {
if (isSubClass(t, Relation.class)) { if (ModelSupport.isSubClass(t, Relation.class)) {
final Relation rel = (Relation) t; final Relation rel = (Relation) t;
return StringUtils.isNotBlank(rel.getSource()) && StringUtils.isNotBlank(rel.getTarget()); return StringUtils.isNotBlank(rel.getSource()) && StringUtils.isNotBlank(rel.getTarget());
} }
return StringUtils.isNotBlank(((OafEntity) t).getId()); return StringUtils.isNotBlank(((Entity) t).getId());
}; };
} }

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.actionmanager.promote; package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass; import static eu.dnetlib.dhp.schema.oaf.common.ModelSupport.isSubClass;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;

View File

@ -13,6 +13,7 @@ import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
public class MergeAndGetTest { public class MergeAndGetTest {
@ -49,7 +50,7 @@ public class MergeAndGetTest {
void shouldThrowForOafAndOafEntity() { void shouldThrowForOafAndOafEntity() {
// given // given
Oaf a = mock(Oaf.class); Oaf a = mock(Oaf.class);
OafEntity b = mock(OafEntity.class); Entity b = mock(Entity.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
@ -75,7 +76,7 @@ public class MergeAndGetTest {
void shouldThrowForRelationAndOafEntity() { void shouldThrowForRelationAndOafEntity() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
OafEntity b = mock(OafEntity.class); Entity b = mock(Entity.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
@ -96,14 +97,15 @@ public class MergeAndGetTest {
// then // then
Oaf x = fn.get().apply(a, b); Oaf x = fn.get().apply(a, b);
assertTrue(Relation.class.isAssignableFrom(x.getClass())); assertTrue(Relation.class.isAssignableFrom(x.getClass()));
verify(a).mergeFrom(b); // verify(a).mergeFrom(b);
a = MergeUtils.merge(verify(a), b);
assertEquals(a, x); assertEquals(a, x);
} }
@Test @Test
void shouldThrowForOafEntityAndOaf() { void shouldThrowForOafEntityAndOaf() {
// given // given
OafEntity a = mock(OafEntity.class); Entity a = mock(Entity.class);
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
// when // when
@ -116,7 +118,7 @@ public class MergeAndGetTest {
@Test @Test
void shouldThrowForOafEntityAndRelation() { void shouldThrowForOafEntityAndRelation() {
// given // given
OafEntity a = mock(OafEntity.class); Entity a = mock(Entity.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
// when // when
@ -129,9 +131,9 @@ public class MergeAndGetTest {
@Test @Test
void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
// given // given
class OafEntitySub1 extends OafEntity { class OafEntitySub1 extends Entity {
} }
class OafEntitySub2 extends OafEntity { class OafEntitySub2 extends Entity {
} }
OafEntitySub1 a = mock(OafEntitySub1.class); OafEntitySub1 a = mock(OafEntitySub1.class);
@ -147,16 +149,16 @@ public class MergeAndGetTest {
@Test @Test
void shouldBehaveProperlyForOafEntityAndOafEntity() { void shouldBehaveProperlyForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); Entity a = mock(Entity.class);
OafEntity b = mock(OafEntity.class); Entity b = mock(Entity.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
Oaf x = fn.get().apply(a, b); Oaf x = fn.get().apply(a, b);
assertTrue(OafEntity.class.isAssignableFrom(x.getClass())); assertTrue(Entity.class.isAssignableFrom(x.getClass()));
verify(a).mergeFrom(b); a = MergeUtils.merge(verify(a), b);
assertEquals(a, x); assertEquals(a, x);
} }
} }
@ -167,7 +169,7 @@ public class MergeAndGetTest {
@Test @Test
void shouldThrowForOafEntityAndRelation() { void shouldThrowForOafEntityAndRelation() {
// given // given
OafEntity a = mock(OafEntity.class); Entity a = mock(Entity.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
// when // when
@ -181,7 +183,7 @@ public class MergeAndGetTest {
void shouldThrowForRelationAndOafEntity() { void shouldThrowForRelationAndOafEntity() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
OafEntity b = mock(OafEntity.class); Entity b = mock(Entity.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
@ -193,7 +195,7 @@ public class MergeAndGetTest {
@Test @Test
void shouldThrowForOafEntityAndResult() { void shouldThrowForOafEntityAndResult() {
// given // given
OafEntity a = mock(OafEntity.class); Entity a = mock(Entity.class);
Result b = mock(Result.class); Result b = mock(Result.class);
// when // when
@ -223,9 +225,9 @@ public class MergeAndGetTest {
@Test @Test
void shouldShouldReturnLeftForOafEntityAndOafEntity() { void shouldShouldReturnLeftForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); Entity a = mock(Entity.class);
when(a.getLastupdatetimestamp()).thenReturn(1L); when(a.getLastupdatetimestamp()).thenReturn(1L);
OafEntity b = mock(OafEntity.class); Entity b = mock(Entity.class);
when(b.getLastupdatetimestamp()).thenReturn(2L); when(b.getLastupdatetimestamp()).thenReturn(2L);
// when // when
@ -233,16 +235,16 @@ public class MergeAndGetTest {
// then // then
Oaf x = fn.get().apply(a, b); Oaf x = fn.get().apply(a, b);
assertTrue(OafEntity.class.isAssignableFrom(x.getClass())); assertTrue(Entity.class.isAssignableFrom(x.getClass()));
assertEquals(b, x); assertEquals(b, x);
} }
@Test @Test
void shouldShouldReturnRightForOafEntityAndOafEntity() { void shouldShouldReturnRightForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); Entity a = mock(Entity.class);
when(a.getLastupdatetimestamp()).thenReturn(2L); when(a.getLastupdatetimestamp()).thenReturn(2L);
OafEntity b = mock(OafEntity.class); Entity b = mock(Entity.class);
when(b.getLastupdatetimestamp()).thenReturn(1L); when(b.getLastupdatetimestamp()).thenReturn(1L);
// when // when
@ -250,7 +252,7 @@ public class MergeAndGetTest {
// then // then
Oaf x = fn.get().apply(a, b); Oaf x = fn.get().apply(a, b);
assertTrue(OafEntity.class.isAssignableFrom(x.getClass())); assertTrue(Entity.class.isAssignableFrom(x.getClass()));
assertEquals(a, x); assertEquals(a, x);
} }
} }

View File

@ -27,8 +27,8 @@ import org.junit.jupiter.params.provider.MethodSource;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
public class PromoteActionPayloadForGraphTableJobTest { public class PromoteActionPayloadForGraphTableJobTest {
private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader(); private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();
@ -80,7 +80,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() {
// given // given
Class<Relation> rowClazz = Relation.class; Class<Relation> rowClazz = Relation.class;
Class<OafEntity> actionPayloadClazz = OafEntity.class; Class<Entity> actionPayloadClazz = Entity.class;
// when // when
RuntimeException exception = assertThrows( RuntimeException exception = assertThrows(

View File

@ -4,7 +4,7 @@
<parent> <parent>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<version>1.2.5-SNAPSHOT</version> <version>2.0.0-SNAPSHOT</version>
</parent> </parent>
<artifactId>dhp-aggregation</artifactId> <artifactId>dhp-aggregation</artifactId>
<build> <build>

View File

@ -12,13 +12,14 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
public class Constants { public class Constants {
public static final String DOI = "doi";
public static final String DOI_CLASSNAME = "Digital Object Identifier"; public static final String DOI_CLASSNAME = "Digital Object Identifier";
public static final String DEFAULT_DELIMITER = ","; public static final String DEFAULT_DELIMITER = ",";
@ -42,6 +43,30 @@ public class Constants {
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final EntityDataInfo SciNoBo_DATA_INFO = OafMapperUtils
.dataInfo(
false,
false,
0.8f, // TODO check
"SciNoBo",
true,
OafMapperUtils
.qualifier(
ModelConstants.PROVENANCE_ENRICH,
null,
ModelConstants.DNET_PROVENANCE_ACTIONS));
public static final DataInfo Bip_DATA_INFO3 = OafMapperUtils
.dataInfo(
0.8f,
UPDATE_DATA_INFO_TYPE,
false,
OafMapperUtils
.qualifier(
UPDATE_MEASURE_BIP_CLASS_ID,
UPDATE_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS));
private Constants() { private Constants() {
} }
@ -72,23 +97,19 @@ public class Constants {
.qualifier( .qualifier(
classid, classid,
classname, classname,
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
ModelConstants.DNET_SUBJECT_TYPOLOGIES)); ModelConstants.DNET_SUBJECT_TYPOLOGIES));
s s
.setDataInfo( .setDataInfo(
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, 0.0f, // TODO check
UPDATE_DATA_INFO_TYPE, UPDATE_DATA_INFO_TYPE,
true, true,
false,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
diqualifierclassid, diqualifierclassid,
UPDATE_CLASS_NAME, UPDATE_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS)));
ModelConstants.DNET_PROVENANCE_ACTIONS),
""));
return s; return s;

View File

@ -40,7 +40,6 @@ import scala.Tuple2;
*/ */
public class SparkAtomicActionScoreJob implements Serializable { public class SparkAtomicActionScoreJob implements Serializable {
private static final String DOI = "doi";
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class); private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -97,7 +96,6 @@ public class SparkAtomicActionScoreJob implements Serializable {
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
bipScores bipScores
.map((MapFunction<BipScore, Result>) bs -> { .map((MapFunction<BipScore, Result>) bs -> {
Result ret = new Result(); Result ret = new Result();
@ -129,25 +127,11 @@ public class SparkAtomicActionScoreJob implements Serializable {
.getUnit() .getUnit()
.stream() .stream()
.map(unit -> { .map(unit -> {
KeyValue kv = new KeyValue(); MeasureUnit u = new MeasureUnit();
kv.setValue(unit.getValue()); u.setValue(unit.getValue());
kv.setKey(unit.getKey()); u.setKey(unit.getKey());
kv u.setDataInfo(Bip_DATA_INFO3);
.setDataInfo( return u;
OafMapperUtils
.dataInfo(
false,
UPDATE_DATA_INFO_TYPE,
true,
false,
OafMapperUtils
.qualifier(
UPDATE_MEASURE_BIP_CLASS_ID,
UPDATE_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS),
""));
return kv;
}) })
.collect(Collectors.toList())); .collect(Collectors.toList()));
return m; return m;

View File

@ -29,12 +29,10 @@ import eu.dnetlib.dhp.actionmanager.bipmodel.BipScore;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Measure;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class PrepareBipFinder implements Serializable { public class PrepareBipFinder implements Serializable {
@ -96,39 +94,23 @@ public class PrepareBipFinder implements Serializable {
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)) }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class))
.map((MapFunction<BipScore, Result>) v -> { .map((MapFunction<BipScore, Result>) v -> {
Result r = new Result(); Result r = new Result();
final String cleanedPid = CleaningFunctions.normalizePidValue(DOI, v.getId()); final String cleanedPid = CleaningFunctions.normalizePidValue(PidType.doi.toString(), v.getId());
r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), DOI)); r.setId(DHPUtils.generateUnresolvedIdentifier(v.getId(), PidType.doi.toString()));
Instance inst = new Instance(); Instance inst = new Instance();
inst.setMeasures(getMeasure(v));
inst /*
.setPid( * inst .setPid( Arrays .asList( OafMapperUtils .structuredProperty( cleanedPid, OafMapperUtils
Arrays * .qualifier( PidType.doi.toString(), DOI_CLASSNAME, ModelConstants.DNET_PID_TYPES,
.asList( * ModelConstants.DNET_PID_TYPES), null)));
OafMapperUtils */
.structuredProperty(
cleanedPid,
OafMapperUtils
.qualifier(
DOI, DOI_CLASSNAME,
ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES),
null)));
r.setInstance(Arrays.asList(inst)); r.setInstance(Arrays.asList(inst));
r
.setDataInfo( /*
OafMapperUtils * r .setDataInfo( OafMapperUtils .dataInfo( false, null, true, false, OafMapperUtils .qualifier(
.dataInfo( * ModelConstants.PROVENANCE_ENRICH, null, ModelConstants.DNET_PROVENANCE_ACTIONS,
false, null, true, * ModelConstants.DNET_PROVENANCE_ACTIONS), null));
false, */
OafMapperUtils
.qualifier(
ModelConstants.PROVENANCE_ENRICH,
null,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS),
null));
return r; return r;
}, Encoders.bean(Result.class)) }, Encoders.bean(Result.class))
.write() .write()
@ -150,25 +132,16 @@ public class PrepareBipFinder implements Serializable {
.getUnit() .getUnit()
.stream() .stream()
.map(unit -> { .map(unit -> {
KeyValue kv = new KeyValue(); MeasureUnit u = new MeasureUnit();
kv.setValue(unit.getValue()); u.setValue(u.getValue());
kv.setKey(unit.getKey()); u.setKey(u.getKey());
kv /*
.setDataInfo( * kv .setDataInfo( OafMapperUtils .dataInfo( false, UPDATE_DATA_INFO_TYPE, true, false,
OafMapperUtils * OafMapperUtils .qualifier( UPDATE_MEASURE_BIP_CLASS_ID, UPDATE_CLASS_NAME,
.dataInfo( * ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS),
false, * ""));
UPDATE_DATA_INFO_TYPE, */
true, return u;
false,
OafMapperUtils
.qualifier(
UPDATE_MEASURE_BIP_CLASS_ID,
UPDATE_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS),
""));
return kv;
}) })
.collect(Collectors.toList())); .collect(Collectors.toList()));
return m; return m;

View File

@ -22,10 +22,12 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.FOSDataModel;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class PrepareFOSSparkJob implements Serializable { public class PrepareFOSSparkJob implements Serializable {
@ -60,7 +62,6 @@ public class PrepareFOSSparkJob implements Serializable {
distributeFOSdois( distributeFOSdois(
spark, spark,
sourcePath, sourcePath,
outputPath); outputPath);
}); });
} }
@ -73,7 +74,7 @@ public class PrepareFOSSparkJob implements Serializable {
.mapGroups((MapGroupsFunction<String, FOSDataModel, Result>) (k, it) -> { .mapGroups((MapGroupsFunction<String, FOSDataModel, Result>) (k, it) -> {
Result r = new Result(); Result r = new Result();
FOSDataModel first = it.next(); FOSDataModel first = it.next();
r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); r.setId(DHPUtils.generateUnresolvedIdentifier(k, PidType.doi.toString()));
HashSet<String> level1 = new HashSet<>(); HashSet<String> level1 = new HashSet<>();
HashSet<String> level2 = new HashSet<>(); HashSet<String> level2 = new HashSet<>();
@ -85,19 +86,7 @@ public class PrepareFOSSparkJob implements Serializable {
level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID))); level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
r.setSubject(sbjs); r.setSubject(sbjs);
r r.setDataInfo(SciNoBo_DATA_INFO);
.setDataInfo(
OafMapperUtils
.dataInfo(
false, null, true,
false,
OafMapperUtils
.qualifier(
ModelConstants.PROVENANCE_ENRICH,
null,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS),
null));
return r; return r;
}, Encoders.bean(Result.class)) }, Encoders.bean(Result.class))
.write() .write()

View File

@ -22,10 +22,12 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.EntityDataInfo;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.schema.oaf.Subject; import eu.dnetlib.dhp.schema.oaf.Subject;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class PrepareSDGSparkJob implements Serializable { public class PrepareSDGSparkJob implements Serializable {
@ -60,7 +62,6 @@ public class PrepareSDGSparkJob implements Serializable {
doPrepare( doPrepare(
spark, spark,
sourcePath, sourcePath,
outputPath); outputPath);
}); });
} }
@ -72,7 +73,7 @@ public class PrepareSDGSparkJob implements Serializable {
.groupByKey((MapFunction<SDGDataModel, String>) r -> r.getDoi().toLowerCase(), Encoders.STRING()) .groupByKey((MapFunction<SDGDataModel, String>) r -> r.getDoi().toLowerCase(), Encoders.STRING())
.mapGroups((MapGroupsFunction<String, SDGDataModel, Result>) (k, it) -> { .mapGroups((MapGroupsFunction<String, SDGDataModel, Result>) (k, it) -> {
Result r = new Result(); Result r = new Result();
r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); r.setId(DHPUtils.generateUnresolvedIdentifier(k, PidType.doi.toString()));
SDGDataModel first = it.next(); SDGDataModel first = it.next();
List<Subject> sbjs = new ArrayList<>(); List<Subject> sbjs = new ArrayList<>();
sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)); sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID));
@ -81,19 +82,7 @@ public class PrepareSDGSparkJob implements Serializable {
s -> sbjs s -> sbjs
.add(getSubject(s.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID))); .add(getSubject(s.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)));
r.setSubject(sbjs); r.setSubject(sbjs);
r r.setDataInfo(SciNoBo_DATA_INFO);
.setDataInfo(
OafMapperUtils
.dataInfo(
false, null, true,
false,
OafMapperUtils
.qualifier(
ModelConstants.PROVENANCE_ENRICH,
null,
ModelConstants.DNET_PROVENANCE_ACTIONS,
ModelConstants.DNET_PROVENANCE_ACTIONS),
null));
return r; return r;
}, Encoders.bean(Result.class)) }, Encoders.bean(Result.class))
.write() .write()

View File

@ -26,17 +26,42 @@ import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import scala.Tuple2; import scala.Tuple2;
public class CreateActionSetSparkJob implements Serializable { public class CreateActionSetSparkJob implements Serializable {
public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations"; public static final String OPENCITATIONS_CLASSID = "sysimport:crosswalk:opencitations";
public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations"; public static final String OPENCITATIONS_CLASSNAME = "Imported from OpenCitations";
private static final String ID_PREFIX = "50|doi_________::"; private static final String ID_PREFIX = "50|doi_________::";
private static final String TRUST = "0.91"; private static final Float TRUST = 0.91f;
private static final KeyValue COLLECTED_FROM;
public static final DataInfo DATA_INFO;
static {
COLLECTED_FROM = new KeyValue();
COLLECTED_FROM.setKey(ModelConstants.OPENOCITATIONS_ID);
COLLECTED_FROM.setValue(ModelConstants.OPENOCITATIONS_NAME);
DATA_INFO = OafMapperUtils
.dataInfo(
TRUST,
null,
false,
OafMapperUtils
.qualifier(
OPENCITATIONS_CLASSID,
OPENCITATIONS_CLASSNAME,
ModelConstants.DNET_PROVENANCE_ACTIONS));
}
private static final List<Provenance> PROVENANCE = Arrays
.asList(
OafMapperUtils.getProvenance(COLLECTED_FROM, DATA_INFO));
private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class); private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@ -107,84 +132,39 @@ public class CreateActionSetSparkJob implements Serializable {
List<Relation> relationList = new ArrayList<>(); List<Relation> relationList = new ArrayList<>();
String citing = ID_PREFIX String citing = asOpenAireId(value.getCiting());
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCiting())); final String cited = asOpenAireId(value.getCited());
final String cited = ID_PREFIX
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", value.getCited()));
if (!citing.equals(cited)) { if (!citing.equals(cited)) {
relationList relationList.add(getRelation(citing, cited));
.addAll(
getRelations(
citing,
cited));
if (duplicate && value.getCiting().endsWith(".refs")) { if (duplicate && value.getCiting().endsWith(".refs")) {
citing = ID_PREFIX + IdentifierFactory citing = asOpenAireId(value.getCiting());
.md5( relationList.add(getRelation(citing, cited));
CleaningFunctions
.normalizePidValue(
"doi", value.getCiting().substring(0, value.getCiting().indexOf(".refs"))));
relationList.addAll(getRelations(citing, cited));
} }
} }
return relationList; return relationList;
} }
private static Collection<Relation> getRelations(String citing, String cited) { private static String asOpenAireId(String value) {
return IdentifierFactory
return Arrays .idFromPid(
.asList( "50", PidType.doi.toString(),
getRelation(citing, cited, ModelConstants.CITES), CleaningFunctions.normalizePidValue(PidType.doi.toString(), value),
getRelation(cited, citing, ModelConstants.IS_CITED_BY)); true);
} }
public static Relation getRelation( public static Relation getRelation(
String source, String source,
String target, String target) {
String relclass) {
Relation r = new Relation(); Relation r = new Relation();
r.setCollectedfrom(getCollectedFrom()); r.setProvenance(PROVENANCE);
r.setSource(source); r.setSource(source);
r.setTarget(target); r.setTarget(target);
r.setRelClass(relclass); r.setRelType(Relation.RELTYPE.resultResult);
r.setRelType(ModelConstants.RESULT_RESULT); r.setSubRelType(Relation.SUBRELTYPE.citation);
r.setSubRelType(ModelConstants.CITATION); r.setRelClass(Relation.RELCLASS.Cites);
r
.setDataInfo(
getDataInfo());
return r; return r;
} }
public static List<KeyValue> getCollectedFrom() {
KeyValue kv = new KeyValue();
kv.setKey(ModelConstants.OPENOCITATIONS_ID);
kv.setValue(ModelConstants.OPENOCITATIONS_NAME);
return Arrays.asList(kv);
}
public static DataInfo getDataInfo() {
DataInfo di = new DataInfo();
di.setInferred(false);
di.setDeletedbyinference(false);
di.setTrust(TRUST);
di
.setProvenanceaction(
getQualifier(OPENCITATIONS_CLASSID, OPENCITATIONS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS));
return di;
}
public static Qualifier getQualifier(String class_id, String class_name,
String qualifierSchema) {
Qualifier pa = new Qualifier();
pa.setClassid(class_id);
pa.setClassname(class_name);
pa.setSchemeid(qualifierSchema);
pa.setSchemename(qualifierSchema);
return pa;
}
} }

View File

@ -28,11 +28,12 @@ import eu.dnetlib.dhp.actionmanager.project.utils.model.JsonTopic;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Entity;
import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Classification;
import eu.dnetlib.dhp.schema.oaf.H2020Programme; import eu.dnetlib.dhp.schema.oaf.H2020Programme;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2; import scala.Tuple2;
@ -157,11 +158,13 @@ public class SparkAtomicActionJob {
}, Encoders.bean(Project.class)) }, Encoders.bean(Project.class))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.groupByKey( .groupByKey(
(MapFunction<Project, String>) OafEntity::getId, (MapFunction<Project, String>) Entity::getId,
Encoders.STRING()) Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> { .mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> {
Project first = it.next(); Project first = it.next();
it.forEachRemaining(first::mergeFrom); while (it.hasNext()) {
first = MergeUtils.mergeProject(first, it.next());
}
return first; return first;
}, Encoders.bean(Project.class)) }, Encoders.bean(Project.class))
.toJavaRDD() .toJavaRDD()

View File

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager.ror;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
@ -43,13 +42,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2; import scala.Tuple2;
@ -64,11 +57,11 @@ public class GenerateRorActionSetJob {
private static final List<KeyValue> ROR_COLLECTED_FROM = listKeyValues( private static final List<KeyValue> ROR_COLLECTED_FROM = listKeyValues(
"10|openaire____::993a7ae7a863813cf95028b50708e222", "ROR"); "10|openaire____::993a7ae7a863813cf95028b50708e222", "ROR");
private static final DataInfo ROR_DATA_INFO = dataInfo( private static final EntityDataInfo ROR_DATA_INFO = dataInfo(
false, "", false, false, ENTITYREGISTRY_PROVENANCE_ACTION, "0.92"); false, false, 0.92f, null, false, ENTITYREGISTRY_PROVENANCE_ACTION);
private static final Qualifier ROR_PID_TYPE = qualifier( private static final Qualifier ROR_PID_TYPE = qualifier(
"ROR", "ROR", ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES); "ROR", "ROR", ModelConstants.DNET_PID_TYPES);
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
@ -132,11 +125,10 @@ public class GenerateRorActionSetJob {
o.setDateofcollection(now.toString()); o.setDateofcollection(now.toString());
o.setDateoftransformation(now.toString()); o.setDateoftransformation(now.toString());
o.setExtraInfo(new ArrayList<>()); // Values not present in the file o.setExtraInfo(new ArrayList<>()); // Values not present in the file
o.setOaiprovenance(null); // Values not present in the file o.setLegalshortname(r.getAcronyms().stream().findFirst().orElse(r.getName()));
o.setLegalshortname(field(r.getAcronyms().stream().findFirst().orElse(r.getName()), ROR_DATA_INFO)); o.setLegalname(r.getName());
o.setLegalname(field(r.getName(), ROR_DATA_INFO));
o.setAlternativeNames(alternativeNames(r)); o.setAlternativeNames(alternativeNames(r));
o.setWebsiteurl(field(r.getLinks().stream().findFirst().orElse(null), ROR_DATA_INFO)); o.setWebsiteurl(r.getLinks().stream().findFirst().orElse(null));
o.setLogourl(null); o.setLogourl(null);
o.setEclegalbody(null); o.setEclegalbody(null);
o.setEclegalperson(null); o.setEclegalperson(null);
@ -155,7 +147,7 @@ public class GenerateRorActionSetJob {
r.getCountry().getCountryCode(), r r.getCountry().getCountryCode(), r
.getCountry() .getCountry()
.getCountryName(), .getCountryName(),
ModelConstants.DNET_COUNTRY_TYPE, ModelConstants.DNET_COUNTRY_TYPE)); ModelConstants.DNET_COUNTRY_TYPE));
} else { } else {
o.setCountry(null); o.setCountry(null);
} }
@ -175,17 +167,17 @@ public class GenerateRorActionSetJob {
private static List<StructuredProperty> pids(final RorOrganization r) { private static List<StructuredProperty> pids(final RorOrganization r) {
final List<StructuredProperty> pids = new ArrayList<>(); final List<StructuredProperty> pids = new ArrayList<>();
pids.add(structuredProperty(r.getId(), ROR_PID_TYPE, ROR_DATA_INFO)); pids.add(structuredProperty(r.getId(), ROR_PID_TYPE));
for (final Map.Entry<String, ExternalIdType> e : r.getExternalIds().entrySet()) { for (final Map.Entry<String, ExternalIdType> e : r.getExternalIds().entrySet()) {
final String type = e.getKey(); final String type = e.getKey();
final List<String> all = e.getValue().getAll(); final List<String> all = e.getValue().getAll();
if (all != null) { if (all != null) {
final Qualifier qualifier = qualifier( final Qualifier qualifier = qualifier(
type, type, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES); type, type, ModelConstants.DNET_PID_TYPES);
for (final String pid : all) { for (final String pid : all) {
pids pids
.add(structuredProperty(pid, qualifier, ROR_DATA_INFO)); .add(structuredProperty(pid, qualifier));
} }
} }
} }
@ -193,7 +185,7 @@ public class GenerateRorActionSetJob {
return pids; return pids;
} }
private static List<Field<String>> alternativeNames(final RorOrganization r) { private static List<String> alternativeNames(final RorOrganization r) {
final Set<String> names = new LinkedHashSet<>(); final Set<String> names = new LinkedHashSet<>();
names.addAll(r.getAliases()); names.addAll(r.getAliases());
names.addAll(r.getAcronyms()); names.addAll(r.getAcronyms());
@ -202,7 +194,6 @@ public class GenerateRorActionSetJob {
return names return names
.stream() .stream()
.filter(StringUtils::isNotBlank) .filter(StringUtils::isNotBlank)
.map(s -> field(s, ROR_DATA_INFO))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }

View File

@ -151,17 +151,14 @@ public class SparkAtomicActionUsageJob implements Serializable {
private static List<Measure> getMeasure(Long downloads, Long views) { private static List<Measure> getMeasure(Long downloads, Long views) {
DataInfo dataInfo = OafMapperUtils DataInfo dataInfo = OafMapperUtils
.dataInfo( .dataInfo(
false, 0.0f, // TODO check
UPDATE_DATA_INFO_TYPE, UPDATE_DATA_INFO_TYPE,
true,
false, false,
OafMapperUtils OafMapperUtils
.qualifier( .qualifier(
UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID, UPDATE_MEASURE_USAGE_COUNTS_CLASS_ID,
UPDATE_CLASS_NAME, UPDATE_CLASS_NAME,
ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS));
ModelConstants.DNET_PROVENANCE_ACTIONS),
"");
return Arrays return Arrays
.asList( .asList(

View File

@ -32,6 +32,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
import eu.dnetlib.dhp.schema.mdstore.Provenance; import eu.dnetlib.dhp.schema.mdstore.Provenance;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import scala.Tuple2; import scala.Tuple2;
public class GenerateNativeStoreSparkJob { public class GenerateNativeStoreSparkJob {
@ -216,7 +217,8 @@ public class GenerateNativeStoreSparkJob {
invalidRecords.add(1); invalidRecords.add(1);
return null; return null;
} }
return new MetadataRecord(originalIdentifier, encoding, provenance, document.asXML(), dateOfCollection); final String id = ModelSupport.generateIdentifier(originalIdentifier, provenance.getNsPrefix());
return new MetadataRecord(id, originalIdentifier, encoding, provenance, document.asXML(), dateOfCollection);
} catch (Throwable e) { } catch (Throwable e) {
invalidRecords.add(1); invalidRecords.add(1);
return null; return null;

View File

@ -642,12 +642,12 @@
"PANGAEA.REPOSITORY": { "PANGAEA.REPOSITORY": {
"openaire_id": "re3data_____::r3d100010134", "openaire_id": "re3data_____::r3d100010134",
"datacite_name": "PANGAEA", "datacite_name": "PANGAEA",
"official_name": "PANGAEA" "official_name": "PANGAEA - Data Publisher for Earth and Environmental Science"
}, },
"TIB.PANGAEA": { "TIB.PANGAEA": {
"openaire_id": "re3data_____::r3d100010134", "openaire_id": "re3data_____::r3d100010134",
"datacite_name": "PANGAEA", "datacite_name": "PANGAEA",
"official_name": "PANGAEA" "official_name": "PANGAEA - Data Publisher for Earth and Environmental Science"
}, },
"NASAPDS.NASAPDS": { "NASAPDS.NASAPDS": {
"openaire_id": "re3data_____::r3d100010121", "openaire_id": "re3data_____::r3d100010121",

View File

@ -1,8 +1,8 @@
package eu.dnetlib.dhp.collection package eu.dnetlib.dhp.collection
import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.schema.common.ModelSupport import eu.dnetlib.dhp.schema.oaf.common.ModelSupport
import eu.dnetlib.dhp.schema.oaf.{Oaf, OafEntity, Relation} import eu.dnetlib.dhp.schema.oaf.{Entity, Oaf, Relation}
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode} import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode}
object CollectionUtils { object CollectionUtils {
@ -17,33 +17,22 @@ object CollectionUtils {
*/ */
def fixRelations(i: Oaf): List[Oaf] = { def fixRelations(i: Oaf): List[Oaf] = {
if (i.isInstanceOf[OafEntity]) if (i.isInstanceOf[Entity])
return List(i) List(i)
else { else {
val r: Relation = i.asInstanceOf[Relation] val r: Relation = i.asInstanceOf[Relation]
val currentRel = ModelSupport.findRelation(r.getRelClass) val inverse = new Relation
if (currentRel != null) { inverse.setSource(r.getTarget)
inverse.setTarget(r.getSource)
// Cleaning relation inverse.setRelType(r.getRelType)
r.setRelType(currentRel.getRelType) inverse.setSubRelType(r.getSubRelType)
r.setSubRelType(currentRel.getSubReltype) inverse.setRelClass(r.getRelClass.getInverse)
r.setRelClass(currentRel.getRelClass) inverse.setProvenance(r.getProvenance)
val inverse = new Relation inverse.setProperties(r.getProperties)
inverse.setSource(r.getTarget) inverse.setValidated(r.getValidated)
inverse.setTarget(r.getSource) inverse.setValidationDate(r.getValidationDate)
inverse.setRelType(currentRel.getRelType) List(r, inverse)
inverse.setSubRelType(currentRel.getSubReltype)
inverse.setRelClass(currentRel.getInverseRelClass)
inverse.setCollectedfrom(r.getCollectedfrom)
inverse.setDataInfo(r.getDataInfo)
inverse.setProperties(r.getProperties)
inverse.setLastupdatetimestamp(r.getLastupdatetimestamp)
inverse.setValidated(r.getValidated)
inverse.setValidationDate(r.getValidationDate)
return List(r, inverse)
}
} }
List()
} }
def saveDataset(dataset: Dataset[Oaf], targetPath: String): Unit = { def saveDataset(dataset: Dataset[Oaf], targetPath: String): Unit = {

View File

@ -0,0 +1,626 @@
package eu.dnetlib.dhp.crossref
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.common.ModelConstants.OPEN_ACCESS_RIGHT
import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils._
import eu.dnetlib.dhp.schema.oaf.utils._
import org.apache.commons.lang.StringUtils
import org.json4s
import org.json4s.DefaultFormats
import org.json4s.JsonAST._
import org.json4s.jackson.JsonMethods.parse
import org.slf4j.{Logger, LoggerFactory}
import java.time.LocalDate
import java.time.format.DateTimeFormatter
import scala.collection.JavaConverters._
import scala.collection.mutable.ListBuffer
import scala.util.matching.Regex
case class CrossrefDT(doi: String, json: String, timestamp: Long) {}
case class CrossrefAuthor(givenName: String, familyName: String, ORCID: String, sequence: String, rank: Int) {}
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
object CrossrefUtility {
val CROSSREF_COLLECTED_FROM = keyValue(ModelConstants.CROSSREF_ID, ModelConstants.CROSSREF_NAME)
val logger: Logger = LoggerFactory.getLogger(getClass)
def convert(input: String, vocabularies: VocabularyGroup): List[Oaf] = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: json4s.JValue = parse(input)
var resultList: List[Oaf] = List()
val objectType = (json \ "type").extractOrElse[String](null)
if (objectType == null)
return resultList
val resultWithType = generateItemFromType(objectType, vocabularies)
if (resultWithType == null)
return List()
val result = resultWithType._1
val cOBJCategory = resultWithType._2
val className = resultWithType._3
mappingResult(result, json, cOBJCategory, className)
if (result == null || result.getId == null)
return List()
val funderList: List[mappingFunder] =
(json \ "funder").extractOrElse[List[mappingFunder]](List())
if (funderList.nonEmpty) {
resultList = resultList ::: mappingFunderToRelations(funderList, result)
}
resultList = resultList ::: List(result)
resultList
}
private def createRelation(sourceId: String, targetId: String, relClass: Relation.RELCLASS): Relation = {
val r = new Relation
//TODO further inspect
r.setSource(sourceId)
r.setTarget(targetId)
r.setRelType(Relation.RELTYPE.resultProject)
r.setRelClass(relClass)
r.setSubRelType(Relation.SUBRELTYPE.outcome)
r.setProvenance(List(OafMapperUtils.getProvenance(CROSSREF_COLLECTED_FROM, null)).asJava)
r
}
private def generateSimpleRelationFromAward(
funder: mappingFunder,
nsPrefix: String,
extractField: String => String,
source: Result
): List[Relation] = {
if (funder.award.isDefined && funder.award.get.nonEmpty)
funder.award.get
.map(extractField)
.filter(a => a != null && a.nonEmpty)
.map(award => {
val targetId = IdentifierFactory.createOpenaireId("project", s"$nsPrefix::$award", true)
createRelation(targetId, source.getId, Relation.RELCLASS.produces)
})
else List()
}
private def extractECAward(award: String): String = {
val awardECRegex: Regex = "[0-9]{4,9}".r
if (awardECRegex.findAllIn(award).hasNext)
return awardECRegex.findAllIn(award).max
null
}
private def snsfRule(award: String): String = {
val tmp1 = StringUtils.substringAfter(award, "_")
val tmp2 = StringUtils.substringBefore(tmp1, "/")
tmp2
}
private def mappingFunderToRelations(funders: List[mappingFunder], result: Result): List[Relation] = {
var relList: List[Relation] = List()
if (funders != null)
funders.foreach(funder => {
if (funder.DOI.isDefined && funder.DOI.get.nonEmpty) {
funder.DOI.get match {
case "10.13039/100010663" | "10.13039/100010661" | "10.13039/501100007601" | "10.13039/501100000780" |
"10.13039/100010665" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "10.13039/100011199" | "10.13039/100004431" | "10.13039/501100004963" | "10.13039/501100000780" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
case "10.13039/501100000781" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "10.13039/100000001" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "nsf_________", a => a, result)
case "10.13039/501100001665" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
case "10.13039/501100002341" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "aka_________", a => a, result)
case "10.13039/501100001602" =>
relList =
relList ::: generateSimpleRelationFromAward(funder, "sfi_________", a => a.replace("SFI", ""), result)
case "10.13039/501100000923" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "arc_________", a => a, result)
case "10.13039/501100000038" =>
val targetId =
IdentifierFactory.createOpenaireId("project", "nserc_______::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, Relation.RELCLASS.produces))
case "10.13039/501100000155" =>
val targetId =
IdentifierFactory.createOpenaireId("project", "sshrc_______::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, Relation.RELCLASS.produces))
case "10.13039/501100000024" =>
val targetId =
IdentifierFactory.createOpenaireId("project", "cihr________::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, Relation.RELCLASS.produces))
case "10.13039/501100002848" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "conicytf____", a => a, result)
case "10.13039/501100003448" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "gsrt________", extractECAward, result)
case "10.13039/501100010198" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "sgov________", a => a, result)
case "10.13039/501100004564" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "mestd_______", extractECAward, result)
case "10.13039/501100003407" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "miur________", a => a, result)
val targetId =
IdentifierFactory.createOpenaireId("project", "miur________::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, Relation.RELCLASS.produces))
case "10.13039/501100006588" | "10.13039/501100004488" =>
relList = relList ::: generateSimpleRelationFromAward(
funder,
"irb_hr______",
a => a.replaceAll("Project No.", "").replaceAll("HRZZ-", ""),
result
)
case "10.13039/501100006769" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "rsf_________", a => a, result)
case "10.13039/501100001711" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "snsf________", snsfRule, result)
case "10.13039/501100004410" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "tubitakf____", a => a, result)
case "10.13039/100004440" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
val targetId =
IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, Relation.RELCLASS.produces))
case _ => logger.debug("no match for " + funder.DOI.get)
}
} else {
funder.name match {
case "European Unions Horizon 2020 research and innovation program" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
case "European Union's" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "corda__h2020", extractECAward, result)
relList = relList ::: generateSimpleRelationFromAward(funder, "corda_______", extractECAward, result)
case "The French National Research Agency (ANR)" | "The French National Research Agency" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "anr_________", a => a, result)
case "CONICYT, Programa de Formación de Capital Humano Avanzado" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "conicytf____", extractECAward, result)
case "Wellcome Trust Masters Fellowship" =>
relList = relList ::: generateSimpleRelationFromAward(funder, "wt__________", a => a, result)
val targetId =
IdentifierFactory.createOpenaireId("project", "wt__________::1e5e62235d094afd01cd56e65112fc63", false)
relList = relList ::: List(createRelation(targetId, result.getId, Relation.RELCLASS.produces))
case _ => logger.debug("no match for " + funder.name)
}
}
})
relList
}
private def mappingResult(result: Result, json: JValue, cobjCategory: String, className: String): Result = {
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
//MAPPING Crossref DOI into PID
val doi: String = CleaningFunctions.normalizePidValue(ModelConstants.DOI, (json \ "DOI").extract[String])
result.setPid(
List(
structuredProperty(doi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES)
).asJava
)
//MAPPING Crossref DOI into OriginalId
//and Other Original Identifier of dataset like clinical-trial-number
val clinicalTrialNumbers: List[String] = for (JString(ctr) <- json \ "clinical-trial-number") yield ctr
val alternativeIds: List[String] = for (JString(ids) <- json \ "alternative-id") yield ids
val tmp = clinicalTrialNumbers ::: alternativeIds ::: List(doi)
result.setOriginalId(tmp.filter(id => id != null).asJava)
// Add DataInfo
result.setDataInfo(dataInfo(false, false, 0.9f, null, false, ModelConstants.REPOSITORY_PROVENANCE_ACTIONS))
result.setLastupdatetimestamp((json \ "indexed" \ "timestamp").extract[Long])
result.setDateofcollection((json \ "indexed" \ "date-time").extract[String])
result.setCollectedfrom(List(CROSSREF_COLLECTED_FROM).asJava)
// Publisher ( Name of work's publisher mapped into Result/Publisher)
val publisher = (json \ "publisher").extractOrElse[String](null)
if (publisher != null && publisher.nonEmpty)
result.setPublisher(new Publisher(publisher))
// TITLE
val mainTitles =
for { JString(title) <- json \ "title" if title.nonEmpty } yield structuredProperty(
title,
ModelConstants.MAIN_TITLE_QUALIFIER
)
val originalTitles = for {
JString(title) <- json \ "original-title" if title.nonEmpty
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
val shortTitles = for {
JString(title) <- json \ "short-title" if title.nonEmpty
} yield structuredProperty(title, ModelConstants.ALTERNATIVE_TITLE_QUALIFIER)
val subtitles =
for { JString(title) <- json \ "subtitle" if title.nonEmpty } yield structuredProperty(
title,
ModelConstants.SUBTITLE_QUALIFIER
)
result.setTitle((mainTitles ::: originalTitles ::: shortTitles ::: subtitles).asJava)
// DESCRIPTION
val descriptionList =
for { JString(description) <- json \ "abstract" } yield description
result.setDescription(descriptionList.asJava)
// Source
val sourceList = for {
JString(source) <- json \ "source" if source != null && source.nonEmpty
} yield source
result.setSource(sourceList.asJava)
//RELEVANT DATE Mapping
val createdDate = generateDate(
(json \ "created" \ "date-time").extract[String],
(json \ "created" \ "date-parts").extract[List[List[Int]]],
"created",
ModelConstants.DNET_DATACITE_DATE
)
val postedDate = generateDate(
(json \ "posted" \ "date-time").extractOrElse[String](null),
(json \ "posted" \ "date-parts").extract[List[List[Int]]],
"available",
ModelConstants.DNET_DATACITE_DATE
)
val acceptedDate = generateDate(
(json \ "accepted" \ "date-time").extractOrElse[String](null),
(json \ "accepted" \ "date-parts").extract[List[List[Int]]],
"accepted",
ModelConstants.DNET_DATACITE_DATE
)
val publishedPrintDate = generateDate(
(json \ "published-print" \ "date-time").extractOrElse[String](null),
(json \ "published-print" \ "date-parts").extract[List[List[Int]]],
"published-print",
ModelConstants.DNET_DATACITE_DATE
)
val publishedOnlineDate = generateDate(
(json \ "published-online" \ "date-time").extractOrElse[String](null),
(json \ "published-online" \ "date-parts").extract[List[List[Int]]],
"published-online",
ModelConstants.DNET_DATACITE_DATE
)
val issuedDate = extractDate(
(json \ "issued" \ "date-time").extractOrElse[String](null),
(json \ "issued" \ "date-parts").extract[List[List[Int]]]
)
if (StringUtils.isNotBlank(issuedDate)) {
result.setDateofacceptance(issuedDate)
} else {
result.setDateofacceptance(createdDate.getValue)
}
result.setRelevantdate(
List(createdDate, postedDate, acceptedDate, publishedOnlineDate, publishedPrintDate)
.filter(p => p != null)
.asJava
)
//Mapping Subject
val subjectList: List[String] = (json \ "subject").extractOrElse[List[String]](List())
if (subjectList.nonEmpty) {
result.setSubject(
subjectList
.map(s =>
OafMapperUtils.subject(
s,
OafMapperUtils.qualifier(
ModelConstants.DNET_SUBJECT_KEYWORD,
ModelConstants.DNET_SUBJECT_KEYWORD,
ModelConstants.DNET_SUBJECT_TYPOLOGIES
),
null
)
)
.asJava
)
}
//Mapping Author
val authorList: List[CrossrefAuthor] =
for {
JObject(author) <- json \ "author"
JField("ORCID", JString(orcid)) <- author
JField("given", JString(givenName)) <- author
JField("family", JString(familyName)) <- author
JField("sequence", JString(sequence)) <- author
} yield CrossrefAuthor(
givenName = givenName,
familyName = familyName,
ORCID = orcid,
sequence = sequence,
rank = 0
)
result.setAuthor(
authorList
.sortWith((a, b) => {
if (a.sequence.equalsIgnoreCase("first"))
true
else if (b.sequence.equalsIgnoreCase("first"))
false
else a.familyName < b.familyName
})
.zipWithIndex
.map(k => k._1.copy(rank = k._2))
.map(k => generateAuthor(k))
.asJava
)
// Mapping instance
val instance = new Instance()
val license = for {
JObject(license) <- json \ "license"
JField("URL", JString(lic)) <- license
JField("content-version", JString(content_version)) <- license
} yield (new License(lic), content_version)
val l = license.filter(d => StringUtils.isNotBlank(d._1.getUrl))
if (l.nonEmpty) {
if (l exists (d => d._2.equals("vor"))) {
for (d <- l) {
if (d._2.equals("vor")) {
instance.setLicense(d._1)
}
}
} else {
instance.setLicense(l.head._1)
}
}
// Ticket #6281 added pid to Instance
instance.setPid(result.getPid)
val has_review = json \ "relation" \ "has-review" \ "id"
if (has_review != JNothing) {
instance.setRefereed(
OafMapperUtils.qualifier(
"0001",
"peerReviewed",
ModelConstants.DNET_REVIEW_LEVELS
)
)
}
if (instance.getLicense != null)
instance.setAccessright(
decideAccessRight(instance.getLicense.getUrl, result.getDateofacceptance)
)
instance.setInstancetype(
OafMapperUtils.qualifier(
cobjCategory,
className,
ModelConstants.DNET_PUBLICATION_RESOURCE
)
)
result.setResourcetype(
OafMapperUtils.qualifier(
cobjCategory,
className,
ModelConstants.DNET_PUBLICATION_RESOURCE
)
)
instance.setCollectedfrom(CROSSREF_COLLECTED_FROM)
if (StringUtils.isNotBlank(issuedDate)) {
instance.setDateofacceptance(issuedDate)
} else {
instance.setDateofacceptance(createdDate.getValue)
}
val s: List[String] = List("https://doi.org/" + doi)
if (s.nonEmpty) {
instance.setUrl(s.asJava)
}
val containerTitles = for { JString(ct) <- json \ "container-title" } yield ct
//Mapping book
if (className.toLowerCase.contains("book")) {
val ISBN = for { JString(isbn) <- json \ "ISBN" } yield isbn
if (ISBN.nonEmpty && containerTitles.nonEmpty) {
val source = s"${containerTitles.head} ISBN: ${ISBN.head}"
if (result.getSource != null) {
val l: List[String] = result.getSource.asScala.toList ::: List(source)
result.setSource(l.asJava)
} else
result.setSource(List(source).asJava)
}
} else {
// Mapping Journal
val issnInfos = for {
JObject(issn_type) <- json \ "issn-type"
JField("type", JString(tp)) <- issn_type
JField("value", JString(vl)) <- issn_type
} yield Tuple2(tp, vl)
val volume = (json \ "volume").extractOrElse[String](null)
if (containerTitles.nonEmpty) {
val journal = new Journal
journal.setName(containerTitles.head)
if (issnInfos.nonEmpty) {
issnInfos.foreach(tp => {
tp._1 match {
case "electronic" => journal.setIssnOnline(tp._2)
case "print" => journal.setIssnPrinted(tp._2)
}
})
}
journal.setVol(volume)
val page = (json \ "page").extractOrElse[String](null)
if (page != null) {
val pp = page.split("-")
if (pp.nonEmpty)
journal.setSp(pp.head)
if (pp.size > 1)
journal.setEp(pp(1))
}
result.setJournal(journal)
}
}
result.setInstance(List(instance).asJava)
result.setId("ID")
result.setId(IdentifierFactory.createIdentifier(result, true))
if (result.getId == null || "ID".equalsIgnoreCase(result.getId))
null
else
result
}
def decideAccessRight(license: String, date: String): AccessRight = {
if (license == null || license.isEmpty) {
//Default value Unknown
return ModelConstants.UNKNOWN_ACCESS_RIGHT();
}
//CC licenses
if (
license.startsWith("cc") ||
license.startsWith("http://creativecommons.org/licenses") ||
license.startsWith("https://creativecommons.org/licenses") ||
//ACS Publications Author choice licenses (considered OPEN also by Unpaywall)
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccby_termsofuse.html") ||
license.equals("http://pubs.acs.org/page/policy/authorchoice_termsofuse.html") ||
license.equals("http://pubs.acs.org/page/policy/authorchoice_ccbyncnd_termsofuse.html") ||
//APA (considered OPEN also by Unpaywall)
license.equals("http://www.apa.org/pubs/journals/resources/open-access.aspx")
) {
val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
return oaq
}
//OUP (BUT ONLY AFTER 12 MONTHS FROM THE PUBLICATION DATE, OTHERWISE THEY ARE EMBARGOED)
if (
license.equals(
"https://academic.oup.com/journals/pages/open_access/funder_policies/chorus/standard_publication_model"
)
) {
val now = java.time.LocalDate.now
try {
val pub_date = LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd"))
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
val oaq: AccessRight = ModelConstants.OPEN_ACCESS_RIGHT()
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
return oaq
} else {
return ModelConstants.EMBARGOED_ACCESS_RIGHT()
}
} catch {
case _: Exception => {
try {
val pub_date =
LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"))
if (((now.toEpochDay - pub_date.toEpochDay) / 365.0) > 1) {
val oaq: AccessRight = OPEN_ACCESS_RIGHT()
oaq.setOpenAccessRoute(OpenAccessRoute.hybrid)
return oaq
} else {
return ModelConstants.EMBARGOED_ACCESS_RIGHT()
}
} catch {
case _: Exception => return ModelConstants.CLOSED_ACCESS_RIGHT()
}
}
}
}
ModelConstants.CLOSED_ACCESS_RIGHT()
}
private def extractDate(dt: String, datePart: List[List[Int]]): String = {
if (StringUtils.isNotBlank(dt))
return GraphCleaningFunctions.cleanDate(dt)
if (datePart != null && datePart.size == 1) {
val res = datePart.head
if (res.size == 3) {
val dp = f"${res.head}-${res(1)}%02d-${res(2)}%02d"
if (dp.length == 10) {
return GraphCleaningFunctions.cleanDate(dp)
}
} else if (res.size == 2) {
val dp = f"${res.head}-${res(1)}%02d-01"
return GraphCleaningFunctions.cleanDate(dp)
} else if (res.size == 1) {
return GraphCleaningFunctions.cleanDate(s"${res.head}-01-01")
}
}
null
}
private def generateDate(
dt: String,
datePart: List[List[Int]],
classId: String,
schemeId: String
): StructuredProperty = {
val dp = extractDate(dt, datePart)
if (StringUtils.isNotBlank(dp))
structuredProperty(dp, classId, classId, schemeId)
else
null
}
private def generateItemFromType(objectType: String, vocabularies: VocabularyGroup): (Result, String, String) = {
val term = vocabularies.getSynonymAsQualifier(ModelConstants.DNET_PUBLICATION_RESOURCE, objectType)
if (term != null) {
val resourceType =
vocabularies.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, term.getClassid).getClassname
resourceType match {
case "publication" => (new Publication, resourceType, term.getClassname)
case "dataset" => (new Dataset, resourceType, term.getClassname)
case "software" => (new Software, resourceType, term.getClassname)
case "otherresearchproduct" => (new OtherResearchProduct, resourceType, term.getClassname)
}
} else
null
}
private def generateAuthor(ca: CrossrefAuthor): Author = {
val a = new Author
a.setName(ca.givenName)
a.setSurname(ca.familyName)
a.setFullname(s"${ca.familyName}, ${ca.givenName}")
a.setRank(ca.rank + 1)
if (StringUtils.isNotBlank(ca.ORCID))
a.setPid(
List(
OafMapperUtils.authorPid(
ca.ORCID,
OafMapperUtils.qualifier(
ModelConstants.ORCID_PENDING,
ModelConstants.ORCID_PENDING,
ModelConstants.DNET_PID_TYPES
),
null
)
).asJava
)
a
}
}

View File

@ -0,0 +1,22 @@
package eu.dnetlib.dhp.crossref
import eu.dnetlib.dhp.application.AbstractScalaApplication
import org.slf4j.{Logger, LoggerFactory}
class GenerateCrossrefDataset(propertyPath: String, args: Array[String], log: Logger)
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
/** Here all the spark applications runs this method
* where the whole logic of the spark node is defined
*/
override def run(): Unit = ???
}
object GenerateCrossrefDataset {
val log: Logger = LoggerFactory.getLogger(getClass)
val propertyPath = "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json"
def main(args: Array[String]): Unit = {
new GenerateCrossrefDataset(propertyPath, args, log).initialize().run()
}
}

View File

@ -2,7 +2,7 @@ package eu.dnetlib.dhp.datacite
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils
import eu.dnetlib.dhp.schema.oaf.{DataInfo, KeyValue} import eu.dnetlib.dhp.schema.oaf.{DataInfo, EntityDataInfo, KeyValue, Relation}
import java.io.InputStream import java.io.InputStream
import java.time.format.DateTimeFormatter import java.time.format.DateTimeFormatter
@ -66,148 +66,32 @@ class DataciteModelConstants extends Serializable {}
object DataciteModelConstants { object DataciteModelConstants {
val REL_TYPE_VALUE: String = "resultResult" val REL_TYPE_VALUE =Relation.RELTYPE.resultResult
val DATE_RELATION_KEY = "RelationDate" val DATE_RELATION_KEY = "RelationDate"
val DATACITE_FILTER_PATH = "/eu/dnetlib/dhp/datacite/datacite_filter" val DATACITE_FILTER_PATH = "/eu/dnetlib/dhp/datacite/datacite_filter"
val DOI_CLASS = "doi"
val SUBJ_CLASS = "keywords" val SUBJ_CLASS = "keywords"
val DATACITE_NAME = "Datacite" val DATACITE_NAME = "Datacite"
val dataInfo: DataInfo = dataciteDataInfo("0.9") val PMID = "pmid"
val ARXIV = "arxiv"
val dataInfo: EntityDataInfo = dataciteDataInfo(0.9f)
val relDataInfo = OafMapperUtils.fromEntityDataInfo(dataInfo);
val DATACITE_COLLECTED_FROM: KeyValue = val DATACITE_COLLECTED_FROM: KeyValue =
OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME) OafMapperUtils.keyValue(ModelConstants.DATACITE_ID, DATACITE_NAME)
val subRelTypeMapping: Map[String, OAFRelations] = Map(
ModelConstants.IS_SUPPLEMENTED_BY -> OAFRelations(
ModelConstants.IS_SUPPLEMENTED_BY,
ModelConstants.IS_SUPPLEMENT_TO,
ModelConstants.SUPPLEMENT
),
ModelConstants.IS_SUPPLEMENT_TO -> OAFRelations(
ModelConstants.IS_SUPPLEMENT_TO,
ModelConstants.IS_SUPPLEMENTED_BY,
ModelConstants.SUPPLEMENT
),
ModelConstants.HAS_PART -> OAFRelations(
ModelConstants.HAS_PART,
ModelConstants.IS_PART_OF,
ModelConstants.PART
),
ModelConstants.IS_PART_OF -> OAFRelations(
ModelConstants.IS_PART_OF,
ModelConstants.HAS_PART,
ModelConstants.PART
),
ModelConstants.IS_VERSION_OF -> OAFRelations(
ModelConstants.IS_VERSION_OF,
ModelConstants.HAS_VERSION,
ModelConstants.VERSION
),
ModelConstants.HAS_VERSION -> OAFRelations(
ModelConstants.HAS_VERSION,
ModelConstants.IS_VERSION_OF,
ModelConstants.VERSION
),
ModelConstants.IS_IDENTICAL_TO -> OAFRelations(
ModelConstants.IS_IDENTICAL_TO,
ModelConstants.IS_IDENTICAL_TO,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_CONTINUED_BY -> OAFRelations(
ModelConstants.IS_CONTINUED_BY,
ModelConstants.CONTINUES,
ModelConstants.RELATIONSHIP
),
ModelConstants.CONTINUES -> OAFRelations(
ModelConstants.CONTINUES,
ModelConstants.IS_CONTINUED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_NEW_VERSION_OF -> OAFRelations(
ModelConstants.IS_NEW_VERSION_OF,
ModelConstants.IS_PREVIOUS_VERSION_OF,
ModelConstants.VERSION
),
ModelConstants.IS_PREVIOUS_VERSION_OF -> OAFRelations(
ModelConstants.IS_PREVIOUS_VERSION_OF,
ModelConstants.IS_NEW_VERSION_OF,
ModelConstants.VERSION
),
ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(
ModelConstants.IS_DOCUMENTED_BY,
ModelConstants.DOCUMENTS,
ModelConstants.RELATIONSHIP
),
ModelConstants.DOCUMENTS -> OAFRelations(
ModelConstants.DOCUMENTS,
ModelConstants.IS_DOCUMENTED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_SOURCE_OF -> OAFRelations(
ModelConstants.IS_SOURCE_OF,
ModelConstants.IS_DERIVED_FROM,
ModelConstants.VERSION
),
ModelConstants.IS_DERIVED_FROM -> OAFRelations(
ModelConstants.IS_DERIVED_FROM,
ModelConstants.IS_SOURCE_OF,
ModelConstants.VERSION
),
ModelConstants.IS_VARIANT_FORM_OF -> OAFRelations(
ModelConstants.IS_VARIANT_FORM_OF,
ModelConstants.IS_DERIVED_FROM,
ModelConstants.VERSION
),
ModelConstants.IS_OBSOLETED_BY -> OAFRelations(
ModelConstants.IS_OBSOLETED_BY,
ModelConstants.IS_NEW_VERSION_OF,
ModelConstants.VERSION
),
ModelConstants.REVIEWS -> OAFRelations(
ModelConstants.REVIEWS,
ModelConstants.IS_REVIEWED_BY,
ModelConstants.REVIEW
),
ModelConstants.IS_REVIEWED_BY -> OAFRelations(
ModelConstants.IS_REVIEWED_BY,
ModelConstants.REVIEWS,
ModelConstants.REVIEW
),
ModelConstants.DOCUMENTS -> OAFRelations(
ModelConstants.DOCUMENTS,
ModelConstants.IS_DOCUMENTED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_DOCUMENTED_BY -> OAFRelations(
ModelConstants.IS_DOCUMENTED_BY,
ModelConstants.DOCUMENTS,
ModelConstants.RELATIONSHIP
),
ModelConstants.COMPILES -> OAFRelations(
ModelConstants.COMPILES,
ModelConstants.IS_COMPILED_BY,
ModelConstants.RELATIONSHIP
),
ModelConstants.IS_COMPILED_BY -> OAFRelations(
ModelConstants.IS_COMPILED_BY,
ModelConstants.COMPILES,
ModelConstants.RELATIONSHIP
)
)
val datacite_filter: List[String] = { val datacite_filter: List[String] = {
val stream: InputStream = getClass.getResourceAsStream(DATACITE_FILTER_PATH) val stream: InputStream = getClass.getResourceAsStream(DATACITE_FILTER_PATH)
require(stream != null) require(stream != null)
Source.fromInputStream(stream).getLines().toList Source.fromInputStream(stream).getLines().toList
} }
def dataciteDataInfo(trust: String): DataInfo = OafMapperUtils.dataInfo( def dataciteDataInfo(trust: Float): EntityDataInfo = OafMapperUtils.dataInfo(
false, false,
false,
trust,
null, null,
false, false,
false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER
ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER,
trust
) )
val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern( val df_en: DateTimeFormatter = DateTimeFormatter.ofPattern(
@ -255,4 +139,8 @@ object DataciteModelConstants {
Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE) Pattern.compile("(19|20)\\d\\d", Pattern.MULTILINE)
) )
def validIdentifiersInRelation(relatedIdentifierType:String):Boolean = {
relatedIdentifierType.equalsIgnoreCase(ModelConstants.DOI) || relatedIdentifierType.equalsIgnoreCase(PMID) ||
relatedIdentifierType.equalsIgnoreCase(ARXIV)
}
} }

View File

@ -1,6 +1,7 @@
package eu.dnetlib.dhp.datacite package eu.dnetlib.dhp.datacite
import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.ObjectMapper
import com.google.common.collect.Lists
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup
import eu.dnetlib.dhp.datacite.DataciteModelConstants._ import eu.dnetlib.dhp.datacite.DataciteModelConstants._
import eu.dnetlib.dhp.schema.action.AtomicAction import eu.dnetlib.dhp.schema.action.AtomicAction
@ -99,7 +100,7 @@ object DataciteToOAFTransformation {
} }
/** This utility method indicates whether the embargo date has been reached /** This utility method indicates whether the embargo date has been reached
* @param embargo_end_date * @param embargo_end_date the end date of embargo
* @return True if the embargo date has been reached, false otherwise * @return True if the embargo date has been reached, false otherwise
*/ */
def embargo_end(embargo_end_date: String): Boolean = { def embargo_end(embargo_end_date: String): Boolean = {
@ -278,33 +279,26 @@ object DataciteToOAFTransformation {
} }
def createDNetTargetIdentifier(pid: String, pidType: String, idPrefix: String): String = {
val f_part = s"$idPrefix|${pidType.toLowerCase}".padTo(15, '_')
s"$f_part::${IdentifierFactory.md5(pid.toLowerCase)}"
}
def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = { def generateOAFDate(dt: String, q: Qualifier): StructuredProperty = {
OafMapperUtils.structuredProperty(dt, q, null) OafMapperUtils.structuredProperty(dt, q)
} }
def generateRelation( def generateRelation(
sourceId: String, sourceId: String,
targetId: String, targetId: String,
relClass: String, relClass: Relation.RELCLASS,
cf: KeyValue, collectedFrom: KeyValue,
di: DataInfo di: DataInfo
): Relation = { ): Relation = {
val r = new Relation val r = new Relation
r.setSource(sourceId) r.setSource(sourceId)
r.setTarget(targetId) r.setTarget(targetId)
r.setRelType(ModelConstants.RESULT_PROJECT) r.setRelType(Relation.RELTYPE.resultProject)
r.setRelClass(relClass) r.setRelClass(relClass)
r.setSubRelType(ModelConstants.OUTCOME) r.setSubRelType(Relation.SUBRELTYPE.outcome)
r.setCollectedfrom(List(cf).asJava) r.setProvenance(Lists.newArrayList(OafMapperUtils.getProvenance(collectedFrom, di)))
r.setDataInfo(di)
r r
} }
def get_projectRelation(awardUri: String, sourceId: String): List[Relation] = { def get_projectRelation(awardUri: String, sourceId: String): List[Relation] = {
@ -315,7 +309,7 @@ object DataciteToOAFTransformation {
val p = match_pattern.get._2 val p = match_pattern.get._2
val grantId = m.matcher(awardUri).replaceAll("$2") val grantId = m.matcher(awardUri).replaceAll("$2")
val targetId = s"$p${DHPUtils.md5(grantId)}" val targetId = s"$p${DHPUtils.md5(grantId)}"
List(generateRelation(sourceId, targetId, "isProducedBy", DATACITE_COLLECTED_FROM, dataInfo)) List(generateRelation(sourceId, targetId, Relation.RELCLASS.isProducedBy, DATACITE_COLLECTED_FROM, relDataInfo))
} else } else
List() List()
@ -351,16 +345,15 @@ object DataciteToOAFTransformation {
// DOI is mapped on a PID inside a Instance object // DOI is mapped on a PID inside a Instance object
val doi_q = OafMapperUtils.qualifier( val doi_q = OafMapperUtils.qualifier(
"doi", ModelConstants.DOI,
"doi", ModelConstants.DOI,
ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES ModelConstants.DNET_PID_TYPES
) )
val pid = OafMapperUtils.structuredProperty(doi, doi_q, dataInfo) val pid = OafMapperUtils.structuredProperty(doi, doi_q)
result.setPid(List(pid).asJava) result.setPid(List(pid).asJava)
// This identifiere will be replaced in a second moment using the PID logic generation // This identifiere will be replaced in a second moment using the PID logic generation
result.setId(OafMapperUtils.createOpenaireId(50, s"datacite____::$doi", true)) result.setId(IdentifierFactory.createOpenaireId(50, s"datacite____::$doi", true))
result.setOriginalId(List(doi).asJava) result.setOriginalId(List(doi).asJava)
val d = new Date(dateOfCollection * 1000) val d = new Date(dateOfCollection * 1000)
@ -389,7 +382,7 @@ object DataciteToOAFTransformation {
) )
else null else null
if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) { if (ni.nameIdentifier != null && ni.nameIdentifier.isDefined) {
OafMapperUtils.structuredProperty(ni.nameIdentifier.get, q, dataInfo) OafMapperUtils.authorPid(ni.nameIdentifier.get, q, relDataInfo)
} else } else
null null
@ -397,13 +390,6 @@ object DataciteToOAFTransformation {
.asJava .asJava
) )
} }
if (c.affiliation.isDefined)
a.setAffiliation(
c.affiliation.get
.filter(af => af.nonEmpty)
.map(af => OafMapperUtils.field(af, dataInfo))
.asJava
)
a.setRank(idx + 1) a.setRank(idx + 1)
a a
} }
@ -420,15 +406,13 @@ object DataciteToOAFTransformation {
.map(t => { .map(t => {
if (t.titleType.isEmpty) { if (t.titleType.isEmpty) {
OafMapperUtils OafMapperUtils
.structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER, null) .structuredProperty(t.title.get, ModelConstants.MAIN_TITLE_QUALIFIER)
} else { } else {
OafMapperUtils.structuredProperty( OafMapperUtils.structuredProperty(
t.title.get, t.title.get,
t.titleType.get, t.titleType.get,
t.titleType.get, t.titleType.get,
ModelConstants.DNET_DATACITE_TITLE, ModelConstants.DNET_DATACITE_TITLE
ModelConstants.DNET_DATACITE_TITLE,
null
) )
} }
}) })
@ -449,46 +433,40 @@ object DataciteToOAFTransformation {
.map(d => d.get) .map(d => d.get)
if (a_date.isDefined) { if (a_date.isDefined) {
if (doi.startsWith("10.14457")) if (doi.startsWith("10.14457")) {
result.setEmbargoenddate( val date = fix_thai_date(a_date.get, "[yyyy-MM-dd]")
OafMapperUtils.field(fix_thai_date(a_date.get, "[yyyy-MM-dd]"), null) result.setEmbargoenddate(date)
) } else {
else result.setEmbargoenddate(a_date.get)
result.setEmbargoenddate(OafMapperUtils.field(a_date.get, null)) }
} }
if (i_date.isDefined && i_date.get.isDefined) { if (i_date.isDefined && i_date.get.isDefined) {
if (doi.startsWith("10.14457")) { if (doi.startsWith("10.14457")) {
result.setDateofacceptance( val date = fix_thai_date(i_date.get.get, "[yyyy-MM-dd]")
OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null) result.setDateofacceptance(date)
)
result result
.getInstance() .getInstance()
.get(0) .get(0)
.setDateofacceptance( .setDateofacceptance(date)
OafMapperUtils.field(fix_thai_date(i_date.get.get, "[yyyy-MM-dd]"), null)
)
} else { } else {
result.setDateofacceptance(OafMapperUtils.field(i_date.get.get, null)) result.setDateofacceptance(i_date.get.get)
result.getInstance().get(0).setDateofacceptance(OafMapperUtils.field(i_date.get.get, null)) result.getInstance().get(0).setDateofacceptance(i_date.get.get)
} }
} else if (publication_year != null) { } else if (publication_year != null) {
val date = s"01-01-$publication_year"
if (doi.startsWith("10.14457")) { if (doi.startsWith("10.14457")) {
result.setDateofacceptance( val fdate = fix_thai_date(date, "[dd-MM-yyyy]")
OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null) result.setDateofacceptance(fdate)
)
result result
.getInstance() .getInstance()
.get(0) .get(0)
.setDateofacceptance( .setDateofacceptance(fdate)
OafMapperUtils.field(fix_thai_date(s"01-01-$publication_year", "[dd-MM-yyyy]"), null)
)
} else { } else {
result.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null)) result.setDateofacceptance(date)
result result
.getInstance() .getInstance()
.get(0) .get(0)
.setDateofacceptance(OafMapperUtils.field(s"01-01-$publication_year", null)) .setDateofacceptance(date)
} }
} }
@ -519,8 +497,7 @@ object DataciteToOAFTransformation {
SUBJ_CLASS, SUBJ_CLASS,
SUBJ_CLASS, SUBJ_CLASS,
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES,
ModelConstants.DNET_SUBJECT_TYPOLOGIES, relDataInfo
null
) )
) )
.asJava .asJava
@ -533,14 +510,14 @@ object DataciteToOAFTransformation {
result.setDescription( result.setDescription(
descriptions descriptions
.filter(d => d.description.isDefined) .filter(d => d.description.isDefined)
.map(d => OafMapperUtils.field(d.description.get, null)) .map(d => d.description.get)
.filter(s => s != null) .filter(s => s != null)
.asJava .asJava
) )
val publisher = (json \\ "publisher").extractOrElse[String](null) val publisher = (json \\ "publisher").extractOrElse[String](null)
if (publisher != null) if (publisher != null)
result.setPublisher(OafMapperUtils.field(publisher, null)) result.setPublisher(OafMapperUtils.publisher(publisher))
val language: String = (json \\ "language").extractOrElse[String](null) val language: String = (json \\ "language").extractOrElse[String](null)
@ -568,7 +545,6 @@ object DataciteToOAFTransformation {
a.setClassid(q.getClassid) a.setClassid(q.getClassid)
a.setClassname(q.getClassname) a.setClassname(q.getClassname)
a.setSchemeid(q.getSchemeid) a.setSchemeid(q.getSchemeid)
a.setSchemename(q.getSchemename)
a a
}) })
@ -578,7 +554,6 @@ object DataciteToOAFTransformation {
OafMapperUtils.accessRight( OafMapperUtils.accessRight(
ModelConstants.UNKNOWN, ModelConstants.UNKNOWN,
ModelConstants.NOT_AVAILABLE, ModelConstants.NOT_AVAILABLE,
ModelConstants.DNET_ACCESS_MODES,
ModelConstants.DNET_ACCESS_MODES ModelConstants.DNET_ACCESS_MODES
) )
@ -598,7 +573,7 @@ object DataciteToOAFTransformation {
) )
) )
if (license.isDefined) if (license.isDefined)
instance.setLicense(OafMapperUtils.field(license.get, null)) instance.setLicense(OafMapperUtils.license(license.get))
} }
val awardUris: List[String] = for { val awardUris: List[String] = for {
@ -640,48 +615,54 @@ object DataciteToOAFTransformation {
List(result) List(result)
} }
//TODO @CLAUDIO we need to define relation in which verse
/**
* This function generate unresolved relation from the original Datacite document
* @param rels the related identifier section on the document
* @param id the source record Identifier
* @param date the date of collection
* @return a List of OAF relation
*/
private def generateRelations( private def generateRelations(
rels: List[RelatedIdentifierType], rels: List[RelatedIdentifierType],
id: String, id: String,
date: String date: String
): List[Relation] = { ): List[Relation] = {
// TODO We need to check how to generate realtions
// in the previous implementation we create all Bidirection Relations
// related to a DOI pid or arxiv,
val bidirectionalRels: List[Relation] = rels val bidirectionalRels: List[Relation] = rels
.filter(r => .filter(r =>
subRelTypeMapping Relation.RELCLASS.exists(r.relationType) && validIdentifiersInRelation(r.relatedIdentifierType)
.contains(r.relationType) && (r.relatedIdentifierType.equalsIgnoreCase("doi") ||
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
r.relatedIdentifierType.equalsIgnoreCase("arxiv"))
) )
.map(r => { .map(r => {
val subRelType = subRelTypeMapping(r.relationType).relType val rc = Relation.RELCLASS.valueOf(r.relationType)
val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType) val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(id, target, subRelType, r.relationType, date) relation(id, target, rc.getSubRel, rc, date)
}) })
val citationRels: List[Relation] = rels val citationRels: List[Relation] = rels
.filter(r => .filter(r =>validIdentifiersInRelation(r.relatedIdentifierType) &&
(r.relatedIdentifierType.equalsIgnoreCase("doi") ||
r.relatedIdentifierType.equalsIgnoreCase("pmid") ||
r.relatedIdentifierType.equalsIgnoreCase("arxiv")) &&
(r.relationType.toLowerCase.contains("cite") || r.relationType.toLowerCase.contains("reference")) (r.relationType.toLowerCase.contains("cite") || r.relationType.toLowerCase.contains("reference"))
) )
.map(r => { .map(r => {
r.relationType match { Relation.RELCLASS.valueOf(r.relationType) match {
case ModelConstants.CITES | ModelConstants.REFERENCES => case Relation.RELCLASS.Cites | Relation.RELCLASS.References =>
val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType) val target = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(id, target, ModelConstants.CITATION, ModelConstants.CITES, date) relation(id, target, Relation.SUBRELTYPE.citation, Relation.RELCLASS.Cites, date)
case ModelConstants.IS_CITED_BY | ModelConstants.IS_REFERENCED_BY => case Relation.RELCLASS.IsCitedBy | Relation.RELCLASS.IsReferencedBy =>
val source = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType) val source = DHPUtils.generateUnresolvedIdentifier(r.relatedIdentifier, r.relatedIdentifierType)
relation(source, id, ModelConstants.CITATION, ModelConstants.CITES, date) relation(source, id, Relation.SUBRELTYPE.citation, Relation.RELCLASS.Cites, date)
} }
}) })
citationRels ::: bidirectionalRels citationRels ::: bidirectionalRels
} }
def relation(source: String, target: String, subRelType: String, relClass: String, date: String): Relation = { def relation(source: String, target: String, subRelType: Relation.SUBRELTYPE, relClass: Relation.RELCLASS, date: String): Relation = {
val rel = new Relation val rel = new Relation
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava) rel.setProvenance(Lists.newArrayList(OafMapperUtils.getProvenance(DATACITE_COLLECTED_FROM, relDataInfo)))
rel.setDataInfo(dataInfo)
rel.setRelType(REL_TYPE_VALUE) rel.setRelType(REL_TYPE_VALUE)
rel.setSubRelType(subRelType) rel.setSubRelType(subRelType)
@ -693,8 +674,6 @@ object DataciteToOAFTransformation {
rel.setSource(source) rel.setSource(source)
rel.setTarget(target) rel.setTarget(target)
rel.setCollectedfrom(List(DATACITE_COLLECTED_FROM).asJava)
rel.getCollectedfrom.asScala.map(c => c.getValue).toList
rel rel
} }

View File

@ -1,12 +1,14 @@
package eu.dnetlib.dhp.sx.bio package eu.dnetlib.dhp.sx.bio
import com.google.common.collect.Lists
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, OafMapperUtils}
import eu.dnetlib.dhp.schema.oaf._ import eu.dnetlib.dhp.schema.oaf._
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils}
import org.json4s.DefaultFormats import org.json4s.DefaultFormats
import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.JsonAST.{JField, JObject, JString}
import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.json4s.jackson.JsonMethods.{compact, parse, render}
import collection.JavaConverters._
import scala.collection.JavaConverters._
object BioDBToOAF { object BioDBToOAF {
@ -34,13 +36,20 @@ object BioDBToOAF {
authors: List[String] authors: List[String]
) {} ) {}
val DATA_INFO: DataInfo = OafMapperUtils.dataInfo( val REL_DATA_INFO: DataInfo = OafMapperUtils.dataInfo(
false, 0.9f,
null, null,
false, false,
ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER
)
val DATA_INFO: EntityDataInfo = OafMapperUtils.dataInfo(
false, false,
ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, false,
"0.9" 0.9f,
null,
false,
ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER
) )
val SUBJ_CLASS = "Keywords" val SUBJ_CLASS = "Keywords"
@ -88,15 +97,6 @@ object BioDBToOAF {
val pubmedCollectedFrom: KeyValue = val pubmedCollectedFrom: KeyValue =
OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central")
UNIPROTCollectedFrom.setDataInfo(DATA_INFO)
PDBCollectedFrom.setDataInfo(DATA_INFO)
ElsevierCollectedFrom.setDataInfo(DATA_INFO)
EBICollectedFrom.setDataInfo(DATA_INFO)
pubmedCollectedFrom.setDataInfo(DATA_INFO)
enaCollectedFrom.setDataInfo(DATA_INFO)
ncbiCollectedFrom.setDataInfo(DATA_INFO)
springerNatureCollectedFrom.setDataInfo(DATA_INFO)
Map( Map(
"uniprot" -> UNIPROTCollectedFrom, "uniprot" -> UNIPROTCollectedFrom,
"pdb" -> PDBCollectedFrom, "pdb" -> PDBCollectedFrom,
@ -127,8 +127,8 @@ object BioDBToOAF {
target_pid_type, target_pid_type,
generate_unresolved_id(source_pid, source_pid_type), generate_unresolved_id(source_pid, source_pid_type),
collectedFromMap("elsevier"), collectedFromMap("elsevier"),
"relationship", Relation.SUBRELTYPE.relationship,
relation_semantic, Relation.RELCLASS.lookUp(relation_semantic),
date date
) )
@ -144,9 +144,7 @@ object BioDBToOAF {
input.pid.toLowerCase, input.pid.toLowerCase,
input.pidType.toLowerCase, input.pidType.toLowerCase,
input.pidType.toLowerCase, input.pidType.toLowerCase,
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES
ModelConstants.DNET_PID_TYPES,
DATA_INFO
) )
).asJava ).asJava
) )
@ -154,15 +152,14 @@ object BioDBToOAF {
d.setDataInfo(DATA_INFO) d.setDataInfo(DATA_INFO)
val nsPrefix = input.pidType.toLowerCase.padTo(12, '_') val nsPrefix = input.pidType.toLowerCase.padTo(12, '_')
d.setId(OafMapperUtils.createOpenaireId(50, s"$nsPrefix::${input.pid.toLowerCase}", true)) d.setId(IdentifierFactory.createOpenaireId(50, s"$nsPrefix::${input.pid.toLowerCase}", true))
if (input.tilte != null && input.tilte.nonEmpty) if (input.tilte != null && input.tilte.nonEmpty)
d.setTitle( d.setTitle(
List( List(
OafMapperUtils.structuredProperty( OafMapperUtils.structuredProperty(
input.tilte.head, input.tilte.head,
ModelConstants.MAIN_TITLE_QUALIFIER, ModelConstants.MAIN_TITLE_QUALIFIER
DATA_INFO
) )
).asJava ).asJava
) )
@ -181,7 +178,6 @@ object BioDBToOAF {
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0037", "0037",
"Clinical Trial", "Clinical Trial",
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
@ -190,7 +186,6 @@ object BioDBToOAF {
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0046", "0046",
"Bioentity", "Bioentity",
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
@ -213,8 +208,8 @@ object BioDBToOAF {
} }
if (input.date != null && input.date.nonEmpty) { if (input.date != null && input.date.nonEmpty) {
val dt = input.date.head val dt = input.date.head
i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) i.setDateofacceptance(GraphCleaningFunctions.cleanDate(dt))
d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(dt), DATA_INFO)) d.setDateofacceptance(GraphCleaningFunctions.cleanDate(dt))
} }
d d
} }
@ -232,15 +227,13 @@ object BioDBToOAF {
pid, pid,
"uniprot", "uniprot",
"uniprot", "uniprot",
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES
ModelConstants.DNET_PID_TYPES,
DATA_INFO
) )
).asJava ).asJava
) )
d.setDataInfo(DATA_INFO) d.setDataInfo(DATA_INFO)
d.setId(OafMapperUtils.createOpenaireId(50, s"uniprot_____::$pid", true)) d.setId(IdentifierFactory.createOpenaireId(50, s"uniprot_____::$pid", true))
d.setCollectedfrom(List(collectedFromMap("uniprot")).asJava) d.setCollectedfrom(List(collectedFromMap("uniprot")).asJava)
val title: String = (json \ "title").extractOrElse[String](null) val title: String = (json \ "title").extractOrElse[String](null)
@ -248,7 +241,7 @@ object BioDBToOAF {
if (title != null) if (title != null)
d.setTitle( d.setTitle(
List( List(
OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO) OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER)
).asJava ).asJava
) )
@ -261,7 +254,6 @@ object BioDBToOAF {
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0046", "0046",
"Bioentity", "Bioentity",
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
@ -286,7 +278,6 @@ object BioDBToOAF {
SUBJ_CLASS, SUBJ_CLASS,
SUBJ_CLASS, SUBJ_CLASS,
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES,
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
null null
) )
) )
@ -298,8 +289,8 @@ object BioDBToOAF {
if (dates.nonEmpty) { if (dates.nonEmpty) {
i_date = dates.find(d => d.date_info.contains("entry version")) i_date = dates.find(d => d.date_info.contains("entry version"))
if (i_date.isDefined) { if (i_date.isDefined) {
i.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) i.setDateofacceptance(i_date.get.date)
d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) d.setDateofacceptance(i_date.get.date)
} }
val relevant_dates: List[StructuredProperty] = dates val relevant_dates: List[StructuredProperty] = dates
.filter(d => !d.date_info.contains("entry version")) .filter(d => !d.date_info.contains("entry version"))
@ -308,14 +299,12 @@ object BioDBToOAF {
date.date, date.date,
ModelConstants.UNKNOWN, ModelConstants.UNKNOWN,
ModelConstants.UNKNOWN, ModelConstants.UNKNOWN,
ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE
ModelConstants.DNET_DATACITE_DATE,
DATA_INFO
) )
) )
if (relevant_dates != null && relevant_dates.nonEmpty) if (relevant_dates != null && relevant_dates.nonEmpty)
d.setRelevantdate(relevant_dates.asJava) d.setRelevantdate(relevant_dates.asJava)
d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) d.setDateofacceptance(i_date.get.date)
} }
val references_pmid: List[String] = for { val references_pmid: List[String] = for {
@ -334,11 +323,11 @@ object BioDBToOAF {
"pmid", "pmid",
d.getId, d.getId,
collectedFromMap("uniprot"), collectedFromMap("uniprot"),
ModelConstants.RELATIONSHIP, Relation.SUBRELTYPE.relationship,
ModelConstants.IS_RELATED_TO, Relation.RELCLASS.IsRelatedTo,
if (i_date.isDefined) i_date.get.date else null if (i_date.isDefined) i_date.get.date else null
) )
rel.getCollectedfrom rel.getProvenance.asScala.map(p => p.getCollectedfrom)
List(d, rel) List(d, rel)
} else if (references_doi != null && references_doi.nonEmpty) { } else if (references_doi != null && references_doi.nonEmpty) {
val rel = createRelation( val rel = createRelation(
@ -346,8 +335,8 @@ object BioDBToOAF {
"doi", "doi",
d.getId, d.getId,
collectedFromMap("uniprot"), collectedFromMap("uniprot"),
ModelConstants.RELATIONSHIP, Relation.SUBRELTYPE.relationship,
ModelConstants.IS_RELATED_TO, Relation.RELCLASS.IsRelatedTo,
if (i_date.isDefined) i_date.get.date else null if (i_date.isDefined) i_date.get.date else null
) )
List(d, rel) List(d, rel)
@ -364,16 +353,24 @@ object BioDBToOAF {
pidType: String, pidType: String,
sourceId: String, sourceId: String,
collectedFrom: KeyValue, collectedFrom: KeyValue,
subRelType: String, subRelType: Relation.SUBRELTYPE,
relClass: String, relClass: Relation.RELCLASS,
date: String date: String
): Relation = { ): Relation = {
val rel = new Relation val rel = new Relation
rel.setCollectedfrom(List(collectedFromMap("pdb")).asJava)
rel.setDataInfo(DATA_INFO)
rel.setRelType(ModelConstants.RESULT_RESULT) val provenance = OafMapperUtils.getProvenance(
Lists.newArrayList(
collectedFrom,
collectedFromMap("pdb")
),
REL_DATA_INFO
)
rel.setProvenance(provenance)
rel.setRelType(Relation.RELTYPE.resultResult)
rel.setSubRelType(subRelType) rel.setSubRelType(subRelType)
rel.setRelClass(relClass) rel.setRelClass(relClass)
@ -383,9 +380,8 @@ object BioDBToOAF {
val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date) val dateProps: KeyValue = OafMapperUtils.keyValue(DATE_RELATION_KEY, date)
rel.setProperties(List(dateProps).asJava) rel.setProperties(List(dateProps).asJava)
rel.getTarget.startsWith("unresolved") rel.getTarget.startsWith("unresolved")
rel.setCollectedfrom(List(collectedFrom).asJava)
rel rel
} }
@ -402,10 +398,11 @@ object BioDBToOAF {
pidType, pidType,
sourceId, sourceId,
collectedFrom, collectedFrom,
ModelConstants.SUPPLEMENT, Relation.SUBRELTYPE.supplement,
ModelConstants.IS_SUPPLEMENT_TO, Relation.RELCLASS.IsSupplementTo,
date date
) )
} }
def pdbTOOaf(input: String): List[Oaf] = { def pdbTOOaf(input: String): List[Oaf] = {
@ -424,16 +421,14 @@ object BioDBToOAF {
pdb, pdb,
"pdb", "pdb",
"Protein Data Bank Identifier", "Protein Data Bank Identifier",
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES
ModelConstants.DNET_PID_TYPES,
DATA_INFO
) )
).asJava ).asJava
) )
d.setCollectedfrom(List(collectedFromMap("pdb")).asJava) d.setCollectedfrom(List(collectedFromMap("pdb")).asJava)
d.setDataInfo(DATA_INFO) d.setDataInfo(DATA_INFO)
d.setId(OafMapperUtils.createOpenaireId(50, s"pdb_________::$pdb", true)) d.setId(IdentifierFactory.createOpenaireId(50, s"pdb_________::$pdb", true))
d.setOriginalId(List(pdb).asJava) d.setOriginalId(List(pdb).asJava)
val title = (json \ "title").extractOrElse[String](null) val title = (json \ "title").extractOrElse[String](null)
@ -442,7 +437,7 @@ object BioDBToOAF {
return List() return List()
d.setTitle( d.setTitle(
List( List(
OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER, DATA_INFO) OafMapperUtils.structuredProperty(title, ModelConstants.MAIN_TITLE_QUALIFIER)
).asJava ).asJava
) )
@ -467,7 +462,6 @@ object BioDBToOAF {
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0046", "0046",
"Bioentity", "Bioentity",
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
@ -535,15 +529,14 @@ object BioDBToOAF {
List( List(
OafMapperUtils.structuredProperty( OafMapperUtils.structuredProperty(
input.title, input.title,
ModelConstants.MAIN_TITLE_QUALIFIER, ModelConstants.MAIN_TITLE_QUALIFIER
DATA_INFO
) )
).asJava ).asJava
) )
val nsPrefix = input.targetPidType.toLowerCase.padTo(12, '_') val nsPrefix = input.targetPidType.toLowerCase.padTo(12, '_')
d.setId(OafMapperUtils.createOpenaireId(50, s"$nsPrefix::${input.targetPid.toLowerCase}", true)) d.setId(IdentifierFactory.createOpenaireId(50, s"$nsPrefix::${input.targetPid.toLowerCase}", true))
d.setOriginalId(List(input.targetPid.toLowerCase).asJava) d.setOriginalId(List(input.targetPid.toLowerCase).asJava)
d.setPid( d.setPid(
@ -552,9 +545,7 @@ object BioDBToOAF {
input.targetPid.toLowerCase, input.targetPid.toLowerCase,
input.targetPidType.toLowerCase, input.targetPidType.toLowerCase,
"Protein Data Bank Identifier", "Protein Data Bank Identifier",
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES
ModelConstants.DNET_PID_TYPES,
DATA_INFO
) )
).asJava ).asJava
) )
@ -567,19 +558,14 @@ object BioDBToOAF {
OafMapperUtils.qualifier( OafMapperUtils.qualifier(
"0046", "0046",
"Bioentity", "Bioentity",
ModelConstants.DNET_PUBLICATION_RESOURCE,
ModelConstants.DNET_PUBLICATION_RESOURCE ModelConstants.DNET_PUBLICATION_RESOURCE
) )
) )
i.setCollectedfrom(collectedFromMap("ebi")) i.setCollectedfrom(collectedFromMap("ebi"))
d.setInstance(List(i).asJava) d.setInstance(List(i).asJava)
i.setDateofacceptance( i.setDateofacceptance(GraphCleaningFunctions.cleanDate(input.date))
OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO) d.setDateofacceptance(GraphCleaningFunctions.cleanDate(input.date))
)
d.setDateofacceptance(
OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)
)
List( List(
d, d,
@ -588,8 +574,8 @@ object BioDBToOAF {
"pmid", "pmid",
d.getId, d.getId,
collectedFromMap("ebi"), collectedFromMap("ebi"),
ModelConstants.RELATIONSHIP, Relation.SUBRELTYPE.relationship,
ModelConstants.IS_RELATED_TO, Relation.RELCLASS.IsRelatedTo,
GraphCleaningFunctions.cleanDate(input.date) GraphCleaningFunctions.cleanDate(input.date)
) )
) )

View File

@ -25,15 +25,17 @@ object PubMedToOaf {
"doi" -> "https://dx.doi.org/" "doi" -> "https://dx.doi.org/"
) )
val dataInfo: DataInfo = OafMapperUtils.dataInfo( val ENTITY_DATAINFO: EntityDataInfo = OafMapperUtils.dataInfo(
false, false,
false,
0.9f,
null, null,
false, false,
false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER
ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER,
"0.9"
) )
val REL_DATAINFO = OafMapperUtils.fromEntityDataInfo(ENTITY_DATAINFO)
val collectedFrom: KeyValue = val collectedFrom: KeyValue =
OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central") OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central")
@ -98,14 +100,12 @@ object PubMedToOaf {
return null return null
val journal = new Journal val journal = new Journal
journal.setDataInfo(dataInfo)
journal.setName(j.getTitle) journal.setName(j.getTitle)
journal.setConferencedate(j.getDate) journal.setConferencedate(j.getDate)
journal.setVol(j.getVolume) journal.setVol(j.getVolume)
journal.setIssnPrinted(j.getIssn) journal.setIssnPrinted(j.getIssn)
journal.setIss(j.getIssue) journal.setIss(j.getIssue)
journal journal
} }
/** Find vocabulary term into synonyms and term in the vocabulary /** Find vocabulary term into synonyms and term in the vocabulary
@ -143,9 +143,7 @@ object PubMedToOaf {
article.getPmid, article.getPmid,
PidType.pmid.toString, PidType.pmid.toString,
PidType.pmid.toString, PidType.pmid.toString,
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES
ModelConstants.DNET_PID_TYPES,
dataInfo
) )
if (StringUtils.isNotBlank(article.getPmcId)) { if (StringUtils.isNotBlank(article.getPmcId)) {
@ -153,9 +151,7 @@ object PubMedToOaf {
article.getPmcId, article.getPmcId,
PidType.pmc.toString, PidType.pmc.toString,
PidType.pmc.toString, PidType.pmc.toString,
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES
ModelConstants.DNET_PID_TYPES,
dataInfo
) )
} }
if (pidList == null) if (pidList == null)
@ -170,9 +166,7 @@ object PubMedToOaf {
normalizedPid, normalizedPid,
PidType.doi.toString, PidType.doi.toString,
PidType.doi.toString, PidType.doi.toString,
ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES
ModelConstants.DNET_PID_TYPES,
dataInfo
) )
} }
@ -200,7 +194,7 @@ object PubMedToOaf {
val result = createResult(pubmedInstance.getInstancetype, vocabularies) val result = createResult(pubmedInstance.getInstancetype, vocabularies)
if (result == null) if (result == null)
return result return result
result.setDataInfo(dataInfo) result.setDataInfo(ENTITY_DATAINFO)
pubmedInstance.setPid(pidList.asJava) pubmedInstance.setPid(pidList.asJava)
if (alternateIdentifier != null) if (alternateIdentifier != null)
pubmedInstance.setAlternateIdentifier(List(alternateIdentifier).asJava) pubmedInstance.setAlternateIdentifier(List(alternateIdentifier).asJava)
@ -218,9 +212,8 @@ object PubMedToOaf {
pubmedInstance.setUrl(urlLists.asJava) pubmedInstance.setUrl(urlLists.asJava)
//ASSIGN DateofAcceptance //ASSIGN DateofAcceptance
pubmedInstance.setDateofacceptance( pubmedInstance.setDateofacceptance(GraphCleaningFunctions.cleanDate(article.getDate))
OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)
)
//ASSIGN COLLECTEDFROM //ASSIGN COLLECTEDFROM
pubmedInstance.setCollectedfrom(collectedFrom) pubmedInstance.setCollectedfrom(collectedFrom)
result.setPid(pidList.asJava) result.setPid(pidList.asJava)
@ -238,9 +231,7 @@ object PubMedToOaf {
// RESULT MAPPING // RESULT MAPPING
//-------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------
result.setDateofacceptance( result.setDateofacceptance(GraphCleaningFunctions.cleanDate(article.getDate))
OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo)
)
if (article.getTitle == null || article.getTitle.isEmpty) if (article.getTitle == null || article.getTitle.isEmpty)
return null return null
@ -248,14 +239,13 @@ object PubMedToOaf {
List( List(
OafMapperUtils.structuredProperty( OafMapperUtils.structuredProperty(
article.getTitle, article.getTitle,
ModelConstants.MAIN_TITLE_QUALIFIER, ModelConstants.MAIN_TITLE_QUALIFIER
dataInfo
) )
).asJava ).asJava
) )
if (article.getDescription != null && article.getDescription.nonEmpty) if (article.getDescription != null && article.getDescription.nonEmpty)
result.setDescription(List(OafMapperUtils.field(article.getDescription, dataInfo)).asJava) result.setDescription(List(article.getDescription).asJava)
if (article.getLanguage != null) { if (article.getLanguage != null) {
@ -271,8 +261,7 @@ object PubMedToOaf {
SUBJ_CLASS, SUBJ_CLASS,
SUBJ_CLASS, SUBJ_CLASS,
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES,
ModelConstants.DNET_SUBJECT_TYPOLOGIES, REL_DATAINFO
dataInfo
) )
)(collection.breakOut) )(collection.breakOut)
if (subjects != null) if (subjects != null)

View File

@ -78,17 +78,12 @@ public class SparkAtomicActionScoreJobTest {
SparkAtomicActionScoreJob SparkAtomicActionScoreJob
.main( .main(
new String[] { new String[] {
"-isSparkSessionManaged", "-isSparkSessionManaged", Boolean.FALSE.toString(),
Boolean.FALSE.toString(), "-inputPath", bipScoresPath,
"-inputPath", "-outputPath", workingDir.toString() + "/actionSet"
bipScoresPath,
"-outputPath",
workingDir.toString() + "/actionSet"
}); });
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Result> tmp = sc JavaRDD<Result> tmp = sc
.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)

View File

@ -94,57 +94,6 @@ public class PrepareTest {
Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count()); Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).count());
Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().size()); Assertions.assertEquals(1, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().size());
Assertions
.assertEquals(
3, tmp.filter(r -> r.getId().equals(doi1)).collect().get(0).getInstance().get(0).getMeasures().size());
Assertions
.assertEquals(
"6.34596412687e-09", tmp
.filter(r -> r.getId().equals(doi1))
.collect()
.get(0)
.getInstance()
.get(0)
.getMeasures()
.stream()
.filter(sl -> sl.getId().equals("influence"))
.collect(Collectors.toList())
.get(0)
.getUnit()
.get(0)
.getValue());
Assertions
.assertEquals(
"0.641151896994", tmp
.filter(r -> r.getId().equals(doi1))
.collect()
.get(0)
.getInstance()
.get(0)
.getMeasures()
.stream()
.filter(sl -> sl.getId().equals("popularity_alt"))
.collect(Collectors.toList())
.get(0)
.getUnit()
.get(0)
.getValue());
Assertions
.assertEquals(
"2.33375102921e-09", tmp
.filter(r -> r.getId().equals(doi1))
.collect()
.get(0)
.getInstance()
.get(0)
.getMeasures()
.stream()
.filter(sl -> sl.getId().equals("popularity"))
.collect(Collectors.toList())
.get(0)
.getUnit()
.get(0)
.getValue());
final String doi2 = "unresolved::10.3390/s18072310::doi"; final String doi2 = "unresolved::10.3390/s18072310::doi";

View File

@ -87,14 +87,8 @@ public class ProduceTest {
.forEach( .forEach(
sbj -> Assertions sbj -> Assertions
.assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid()));
sbjs
.forEach(
sbj -> Assertions
.assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename()));
sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference()));
sbjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); sbjs.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred()));
sbjs.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible()));
sbjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); sbjs.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust()));
sbjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); sbjs.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()));
sbjs sbjs
@ -109,49 +103,6 @@ public class ProduceTest {
sbj -> Assertions sbj -> Assertions
.assertEquals( .assertEquals(
ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid()));
sbjs
.forEach(
sbj -> Assertions
.assertEquals(
ModelConstants.DNET_PROVENANCE_ACTIONS,
sbj.getDataInfo().getProvenanceaction().getSchemename()));
}
@Test
void produceTestMeasuress() throws Exception {
JavaRDD<Result> tmp = getResultJavaRDD();
List<KeyValue> mes = tmp
.filter(row -> row.getInstance() != null && row.getInstance().size() > 0)
.flatMap(row -> row.getInstance().iterator())
.flatMap(i -> i.getMeasures().iterator())
.flatMap(m -> m.getUnit().iterator())
.collect();
mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference()));
mes.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred()));
mes.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible()));
mes.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust()));
mes.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()));
mes
.forEach(
sbj -> Assertions.assertEquals("measure:bip", sbj.getDataInfo().getProvenanceaction().getClassid()));
mes
.forEach(
sbj -> Assertions
.assertEquals("Inferred by OpenAIRE", sbj.getDataInfo().getProvenanceaction().getClassname()));
mes
.forEach(
sbj -> Assertions
.assertEquals(
ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid()));
mes
.forEach(
sbj -> Assertions
.assertEquals(
ModelConstants.DNET_PROVENANCE_ACTIONS,
sbj.getDataInfo().getProvenanceaction().getSchemename()));
} }
@Test @Test
@ -191,107 +142,6 @@ public class ProduceTest {
} }
@Test
void produceTest3Measures() throws Exception {
final String doi = "unresolved::10.3390/s18072310::doi";
JavaRDD<Result> tmp = getResultJavaRDD();
tmp
.filter(row -> row.getId().equals(doi))
.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
Assertions
.assertEquals(
3, tmp
.filter(row -> row.getId().equals(doi))
.collect()
.get(0)
.getInstance()
.get(0)
.getMeasures()
.size());
List<Measure> measures = tmp
.filter(row -> row.getId().equals(doi))
.flatMap(row -> row.getInstance().iterator())
.flatMap(inst -> inst.getMeasures().iterator())
.collect();
Assertions
.assertEquals(
"7.5597134689e-09", measures
.stream()
.filter(mes -> mes.getId().equals("influence"))
.collect(Collectors.toList())
.get(0)
.getUnit()
.get(0)
.getValue());
Assertions
.assertEquals(
"4.903880192", measures
.stream()
.filter(mes -> mes.getId().equals("popularity_alt"))
.collect(Collectors.toList())
.get(0)
.getUnit()
.get(0)
.getValue());
Assertions
.assertEquals(
"1.17977512835e-08", measures
.stream()
.filter(mes -> mes.getId().equals("popularity"))
.collect(Collectors.toList())
.get(0)
.getUnit()
.get(0)
.getValue());
Assertions
.assertEquals(
"10.3390/s18072310",
tmp
.filter(row -> row.getId().equals(doi))
.collect()
.get(0)
.getInstance()
.get(0)
.getPid()
.get(0)
.getValue()
.toLowerCase());
Assertions
.assertEquals(
"doi",
tmp
.filter(row -> row.getId().equals(doi))
.collect()
.get(0)
.getInstance()
.get(0)
.getPid()
.get(0)
.getQualifier()
.getClassid());
Assertions
.assertEquals(
"Digital Object Identifier",
tmp
.filter(row -> row.getId().equals(doi))
.collect()
.get(0)
.getInstance()
.get(0)
.getPid()
.get(0)
.getQualifier()
.getClassname());
}
@Test @Test
void produceTestMeasures() throws Exception { void produceTestMeasures() throws Exception {
final String doi = "unresolved::10.3390/s18072310::doi"; final String doi = "unresolved::10.3390/s18072310::doi";
@ -454,7 +304,6 @@ public class ProduceTest {
SparkSaveUnresolved.main(new String[] { SparkSaveUnresolved.main(new String[] {
"--isSparkSessionManaged", Boolean.FALSE.toString(), "--isSparkSessionManaged", Boolean.FALSE.toString(),
"--sourcePath", workingDir.toString() + "/work", "--sourcePath", workingDir.toString() + "/work",
"-outputPath", workingDir.toString() + "/unresolved" "-outputPath", workingDir.toString() + "/unresolved"
}); });
@ -553,14 +402,8 @@ public class ProduceTest {
.forEach( .forEach(
sbj -> Assertions sbj -> Assertions
.assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid())); .assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemeid()));
sbjs_sdg
.forEach(
sbj -> Assertions
.assertEquals(ModelConstants.DNET_SUBJECT_TYPOLOGIES, sbj.getQualifier().getSchemename()));
sbjs_sdg.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getDeletedbyinference()));
sbjs_sdg.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred())); sbjs_sdg.forEach(sbj -> Assertions.assertEquals(true, sbj.getDataInfo().getInferred()));
sbjs_sdg.forEach(sbj -> Assertions.assertEquals(false, sbj.getDataInfo().getInvisible()));
sbjs_sdg.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust())); sbjs_sdg.forEach(sbj -> Assertions.assertEquals("", sbj.getDataInfo().getTrust()));
sbjs_sdg.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance())); sbjs_sdg.forEach(sbj -> Assertions.assertEquals("update", sbj.getDataInfo().getInferenceprovenance()));
sbjs_sdg sbjs_sdg
@ -575,12 +418,6 @@ public class ProduceTest {
sbj -> Assertions sbj -> Assertions
.assertEquals( .assertEquals(
ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid())); ModelConstants.DNET_PROVENANCE_ACTIONS, sbj.getDataInfo().getProvenanceaction().getSchemeid()));
sbjs_sdg
.forEach(
sbj -> Assertions
.assertEquals(
ModelConstants.DNET_PROVENANCE_ACTIONS,
sbj.getDataInfo().getProvenanceaction().getSchemename()));
} }
} }

View File

@ -27,8 +27,10 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
@ -99,7 +101,7 @@ public class CreateOpenCitationsASTest {
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Relation) aa.getPayload())); .map(aa -> ((Relation) aa.getPayload()));
assertEquals(62, tmp.count()); assertEquals(31, tmp.count());
// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
@ -131,10 +133,7 @@ public class CreateOpenCitationsASTest {
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Relation) aa.getPayload())); .map(aa -> ((Relation) aa.getPayload()));
assertEquals(46, tmp.count()); assertEquals(23, tmp.count());
// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
} }
@Test @Test
@ -164,8 +163,8 @@ public class CreateOpenCitationsASTest {
.map(aa -> ((Relation) aa.getPayload())); .map(aa -> ((Relation) aa.getPayload()));
tmp.foreach(r -> { tmp.foreach(r -> {
assertEquals(ModelConstants.OPENOCITATIONS_NAME, r.getCollectedfrom().get(0).getValue()); assertEquals(ModelConstants.OPENOCITATIONS_NAME, r.getProvenance().get(0).getCollectedfrom().getValue());
assertEquals(ModelConstants.OPENOCITATIONS_ID, r.getCollectedfrom().get(0).getKey()); assertEquals(ModelConstants.OPENOCITATIONS_ID, r.getProvenance().get(0).getCollectedfrom().getKey());
}); });
} }
@ -197,15 +196,14 @@ public class CreateOpenCitationsASTest {
.map(aa -> ((Relation) aa.getPayload())); .map(aa -> ((Relation) aa.getPayload()));
tmp.foreach(r -> { tmp.foreach(r -> {
assertEquals(false, r.getDataInfo().getInferred()); final DataInfo dataInfo = r.getProvenance().get(0).getDataInfo();
assertEquals(false, r.getDataInfo().getDeletedbyinference()); assertEquals(false, dataInfo.getInferred());
assertEquals("0.91", r.getDataInfo().getTrust()); assertEquals(0.91f, dataInfo.getTrust());
assertEquals( assertEquals(
CreateActionSetSparkJob.OPENCITATIONS_CLASSID, r.getDataInfo().getProvenanceaction().getClassid()); CreateActionSetSparkJob.OPENCITATIONS_CLASSID, dataInfo.getProvenanceaction().getClassid());
assertEquals( assertEquals(
CreateActionSetSparkJob.OPENCITATIONS_CLASSNAME, r.getDataInfo().getProvenanceaction().getClassname()); CreateActionSetSparkJob.OPENCITATIONS_CLASSNAME, dataInfo.getProvenanceaction().getClassname());
assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, r.getDataInfo().getProvenanceaction().getSchemeid()); assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, dataInfo.getProvenanceaction().getSchemeid());
assertEquals(ModelConstants.DNET_PROVENANCE_ACTIONS, r.getDataInfo().getProvenanceaction().getSchemename());
}); });
} }
@ -240,9 +238,8 @@ public class CreateOpenCitationsASTest {
assertEquals("citation", r.getSubRelType()); assertEquals("citation", r.getSubRelType());
assertEquals("resultResult", r.getRelType()); assertEquals("resultResult", r.getRelType());
}); });
assertEquals(23, tmp.count());
assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count()); assertEquals(23, tmp.filter(r -> r.getRelClass().equals("Cites")).count());
assertEquals(23, tmp.filter(r -> r.getRelClass().equals("IsCitedBy")).count());
} }
@Test @Test
@ -281,17 +278,17 @@ public class CreateOpenCitationsASTest {
@Test @Test
void testRelationsSourceTargetCouple() throws Exception { void testRelationsSourceTargetCouple() throws Exception {
final String doi1 = "50|doi_________::" final String doi1 = "50|doi_________::"
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-015-3684-x")); + ModelSupport.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-015-3684-x"));
final String doi2 = "50|doi_________::" final String doi2 = "50|doi_________::"
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/j.1551-2916.2008.02408.x")); + ModelSupport.md5(CleaningFunctions.normalizePidValue("doi", "10.1111/j.1551-2916.2008.02408.x"));
final String doi3 = "50|doi_________::" final String doi3 = "50|doi_________::"
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-014-2114-9")); + ModelSupport.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-014-2114-9"));
final String doi4 = "50|doi_________::" final String doi4 = "50|doi_________::"
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/j.ceramint.2013.09.069")); + ModelSupport.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/j.ceramint.2013.09.069"));
final String doi5 = "50|doi_________::" final String doi5 = "50|doi_________::"
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-009-9913-4")); + ModelSupport.md5(CleaningFunctions.normalizePidValue("doi", "10.1007/s10854-009-9913-4"));
final String doi6 = "50|doi_________::" final String doi6 = "50|doi_________::"
+ IdentifierFactory.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/0038-1098(72)90370-5")); + ModelSupport.md5(CleaningFunctions.normalizePidValue("doi", "10.1016/0038-1098(72)90370-5"));
String inputPath = getClass() String inputPath = getClass()
.getResource( .getResource(
@ -318,18 +315,18 @@ public class CreateOpenCitationsASTest {
JavaRDD<Relation> check = tmp.filter(r -> r.getSource().equals(doi1) || r.getTarget().equals(doi1)); JavaRDD<Relation> check = tmp.filter(r -> r.getSource().equals(doi1) || r.getTarget().equals(doi1));
assertEquals(10, check.count()); assertEquals(5, check.count());
check.foreach(r -> { check.foreach(r -> {
if (r.getSource().equals(doi2) || r.getSource().equals(doi3) || r.getSource().equals(doi4) || if (r.getSource().equals(doi2) || r.getSource().equals(doi3) || r.getSource().equals(doi4) ||
r.getSource().equals(doi5) || r.getSource().equals(doi6)) { r.getSource().equals(doi5) || r.getSource().equals(doi6)) {
assertEquals(ModelConstants.IS_CITED_BY, r.getRelClass()); assertEquals(Relation.RELCLASS.IsCitedBy, r.getRelClass());
assertEquals(doi1, r.getTarget()); assertEquals(doi1, r.getTarget());
} }
}); });
assertEquals(5, check.filter(r -> r.getSource().equals(doi1)).count()); assertEquals(5, check.filter(r -> r.getSource().equals(doi1)).count());
check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(ModelConstants.CITES, r.getRelClass())); check.filter(r -> r.getSource().equals(doi1)).foreach(r -> assertEquals(Relation.RELCLASS.Cites, r.getRelClass()));
} }
} }

View File

@ -50,7 +50,7 @@ class GenerateRorActionSetJobTest {
assertEquals("AU", o.getCountry().getClassid()); assertEquals("AU", o.getCountry().getClassid());
assertNotNull(o.getLegalname()); assertNotNull(o.getLegalname());
assertEquals("Mount Stromlo Observatory", o.getLegalname().getValue()); assertEquals("Mount Stromlo Observatory", o.getLegalname());
System.out.println(mapper.writeValueAsString(o)); System.out.println(mapper.writeValueAsString(o));
} }

View File

@ -8,6 +8,7 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.Entity;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -24,7 +25,6 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
public class SparkAtomicActionCountJobTest { public class SparkAtomicActionCountJobTest {
@ -74,49 +74,28 @@ public class SparkAtomicActionCountJobTest {
SparkAtomicActionUsageJob.writeActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet"); SparkAtomicActionUsageJob.writeActionSet(spark, usageScoresPath, workingDir.toString() + "/actionSet");
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<AtomicAction> tmp = sc JavaRDD<AtomicAction> tmp = sc
.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
.map(usm -> OBJECT_MAPPER.readValue(usm._2.getBytes(), AtomicAction.class)); .map(usm -> OBJECT_MAPPER.readValue(usm._2.getBytes(), AtomicAction.class));
// .map(aa -> (Result) aa.getPayload()); // .map(aa -> (Result) aa.getPayload());
Assertions.assertEquals(9, tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("50|")).count()); Assertions.assertEquals(9, tmp.filter(aa -> ((Entity) aa.getPayload()).getId().startsWith("50|")).count());
Assertions.assertEquals(9, tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("10|")).count()); Assertions.assertEquals(9, tmp.filter(aa -> ((Entity) aa.getPayload()).getId().startsWith("10|")).count());
Assertions.assertEquals(9, tmp.filter(aa -> ((OafEntity) aa.getPayload()).getId().startsWith("40|")).count()); Assertions.assertEquals(9, tmp.filter(aa -> ((Entity) aa.getPayload()).getId().startsWith("40|")).count());
tmp.foreach(r -> Assertions.assertEquals(2, ((OafEntity) r.getPayload()).getMeasures().size())); tmp.foreach(r -> Assertions.assertEquals(2, ((Entity) r.getPayload()).getMeasures().size()));
tmp tmp
.foreach( .foreach(
r -> ((OafEntity) r.getPayload()) r -> ((Entity) r.getPayload())
.getMeasures()
.stream()
.forEach(
m -> m
.getUnit()
.stream()
.forEach(u -> Assertions.assertFalse(u.getDataInfo().getDeletedbyinference()))));
tmp
.foreach(
r -> ((OafEntity) r.getPayload())
.getMeasures() .getMeasures()
.stream() .stream()
.forEach( .forEach(
m -> m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred())))); m -> m.getUnit().stream().forEach(u -> Assertions.assertTrue(u.getDataInfo().getInferred()))));
tmp tmp
.foreach( .foreach(
r -> ((OafEntity) r.getPayload()) r -> ((Entity) r.getPayload())
.getMeasures()
.stream()
.forEach(
m -> m
.getUnit()
.stream()
.forEach(u -> Assertions.assertFalse(u.getDataInfo().getInvisible()))));
tmp
.foreach(
r -> ((OafEntity) r.getPayload())
.getMeasures() .getMeasures()
.stream() .stream()
.forEach( .forEach(
@ -130,7 +109,7 @@ public class SparkAtomicActionCountJobTest {
u.getDataInfo().getProvenanceaction().getClassid())))); u.getDataInfo().getProvenanceaction().getClassid()))));
tmp tmp
.foreach( .foreach(
r -> ((OafEntity) r.getPayload()) r -> ((Entity) r.getPayload())
.getMeasures() .getMeasures()
.stream() .stream()
.forEach( .forEach(
@ -145,7 +124,7 @@ public class SparkAtomicActionCountJobTest {
tmp tmp
.foreach( .foreach(
r -> ((OafEntity) r.getPayload()) r -> ((Entity) r.getPayload())
.getMeasures() .getMeasures()
.stream() .stream()
.forEach( .forEach(
@ -163,7 +142,7 @@ public class SparkAtomicActionCountJobTest {
1, 1,
tmp tmp
.filter( .filter(
r -> ((OafEntity) r.getPayload()) r -> ((Entity) r.getPayload())
.getId() .getId()
.equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")) .equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
.count()); .count());
@ -172,7 +151,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"0", "0",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")) .filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
.collect() .collect()
.get(0) .get(0)
@ -188,7 +167,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"5", "5",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6")) .filter(r -> r.getId().equals("50|dedup_wf_001::53575dc69e9ace947e02d47ecd54a7a6"))
.collect() .collect()
.get(0) .get(0)
@ -205,7 +184,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"0", "0",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")) .filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0"))
.collect() .collect()
.get(0) .get(0)
@ -221,7 +200,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"1", "1",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0")) .filter(r -> r.getId().equals("50|doi_________::17eda2ff77407538fbe5d3d719b9d1c0"))
.collect() .collect()
.get(0) .get(0)
@ -238,7 +217,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"2", "2",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")) .filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f"))
.collect() .collect()
.get(0) .get(0)
@ -254,7 +233,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"6", "6",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f")) .filter(r -> r.getId().equals("50|doi_________::3085e4c6e051378ca6157fe7f0430c1f"))
.collect() .collect()
.get(0) .get(0)
@ -271,7 +250,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"0", "0",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("40|f1__________::53575dc69e9ace947e02d47ecd54a7a6")) .filter(r -> r.getId().equals("40|f1__________::53575dc69e9ace947e02d47ecd54a7a6"))
.collect() .collect()
.get(0) .get(0)
@ -287,7 +266,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"5", "5",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("40|f1__________::53575dc69e9ace947e02d47ecd54a7a6")) .filter(r -> r.getId().equals("40|f1__________::53575dc69e9ace947e02d47ecd54a7a6"))
.collect() .collect()
.get(0) .get(0)
@ -304,7 +283,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"0", "0",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("40|f11_________::17eda2ff77407538fbe5d3d719b9d1c0")) .filter(r -> r.getId().equals("40|f11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
.collect() .collect()
.get(0) .get(0)
@ -320,7 +299,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"1", "1",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("40|f11_________::17eda2ff77407538fbe5d3d719b9d1c0")) .filter(r -> r.getId().equals("40|f11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
.collect() .collect()
.get(0) .get(0)
@ -337,7 +316,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"2", "2",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("40|f12_________::3085e4c6e051378ca6157fe7f0430c1f")) .filter(r -> r.getId().equals("40|f12_________::3085e4c6e051378ca6157fe7f0430c1f"))
.collect() .collect()
.get(0) .get(0)
@ -353,7 +332,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"6", "6",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("40|f12_________::3085e4c6e051378ca6157fe7f0430c1f")) .filter(r -> r.getId().equals("40|f12_________::3085e4c6e051378ca6157fe7f0430c1f"))
.collect() .collect()
.get(0) .get(0)
@ -370,7 +349,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"0", "0",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("10|d1__________::53575dc69e9ace947e02d47ecd54a7a6")) .filter(r -> r.getId().equals("10|d1__________::53575dc69e9ace947e02d47ecd54a7a6"))
.collect() .collect()
.get(0) .get(0)
@ -386,7 +365,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"5", "5",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("10|d1__________::53575dc69e9ace947e02d47ecd54a7a6")) .filter(r -> r.getId().equals("10|d1__________::53575dc69e9ace947e02d47ecd54a7a6"))
.collect() .collect()
.get(0) .get(0)
@ -403,7 +382,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"0", "0",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("10|d11_________::17eda2ff77407538fbe5d3d719b9d1c0")) .filter(r -> r.getId().equals("10|d11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
.collect() .collect()
.get(0) .get(0)
@ -419,7 +398,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"1", "1",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("10|d11_________::17eda2ff77407538fbe5d3d719b9d1c0")) .filter(r -> r.getId().equals("10|d11_________::17eda2ff77407538fbe5d3d719b9d1c0"))
.collect() .collect()
.get(0) .get(0)
@ -436,7 +415,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"2", "2",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("10|d12_________::3085e4c6e051378ca6157fe7f0430c1f")) .filter(r -> r.getId().equals("10|d12_________::3085e4c6e051378ca6157fe7f0430c1f"))
.collect() .collect()
.get(0) .get(0)
@ -452,7 +431,7 @@ public class SparkAtomicActionCountJobTest {
.assertEquals( .assertEquals(
"6", "6",
tmp tmp
.map(r -> ((OafEntity) r.getPayload())) .map(r -> ((Entity) r.getPayload()))
.filter(r -> r.getId().equals("10|d12_________::3085e4c6e051378ca6157fe7f0430c1f")) .filter(r -> r.getId().equals("10|d12_________::3085e4c6e051378ca6157fe7f0430c1f"))
.collect() .collect()
.get(0) .get(0)

View File

@ -3,7 +3,7 @@
<parent> <parent>
<artifactId>dhp-workflows</artifactId> <artifactId>dhp-workflows</artifactId>
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<version>1.2.5-SNAPSHOT</version> <version>2.0.0-SNAPSHOT</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -13,6 +13,8 @@ import java.util.List;
import java.util.function.Consumer; import java.util.function.Consumer;
import java.util.function.Function; import java.util.function.Function;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.common.RelationLabel;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -25,8 +27,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.DbClient; import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
public class ReadBlacklistFromDB implements Closeable { public class ReadBlacklistFromDB implements Closeable {
@ -76,30 +76,22 @@ public class ReadBlacklistFromDB implements Closeable {
public List<Relation> processBlacklistEntry(ResultSet rs) { public List<Relation> processBlacklistEntry(ResultSet rs) {
try { try {
Relation direct = new Relation(); Relation direct = new Relation();
Relation inverse = new Relation();
String source_prefix = ModelSupport.entityIdPrefix.get(rs.getString("source_type")); String source_prefix = ModelSupport.entityIdPrefix.get(rs.getString("source_type"));
String target_prefix = ModelSupport.entityIdPrefix.get(rs.getString("target_type")); String target_prefix = ModelSupport.entityIdPrefix.get(rs.getString("target_type"));
String source_direct = source_prefix + "|" + rs.getString("source"); String source_direct = source_prefix + "|" + rs.getString("source");
direct.setSource(source_direct); direct.setSource(source_direct);
inverse.setTarget(source_direct);
String target_direct = target_prefix + "|" + rs.getString("target"); String target_direct = target_prefix + "|" + rs.getString("target");
direct.setTarget(target_direct); direct.setTarget(target_direct);
inverse.setSource(target_direct);
String encoding = rs.getString("relationship"); String encoding = rs.getString("relationship");
RelationInverse ri = ModelSupport.findInverse(encoding); final RelationLabel directLabel = ModelSupport.unRel(encoding);
direct.setRelClass(ri.getRelClass()); direct.setRelClass(directLabel.getRelClass());
inverse.setRelClass(ri.getInverseRelClass()); direct.setRelType(directLabel.getRelType());
direct.setRelType(ri.getRelType()); direct.setSubRelType(directLabel.getSubReltype());
inverse.setRelType(ri.getRelType()); return Arrays.asList(direct, direct.inverse());
direct.setSubRelType(ri.getSubReltype());
inverse.setSubRelType(ri.getSubReltype());
return Arrays.asList(direct, inverse);
} catch (final SQLException e) { } catch (final SQLException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }

View File

@ -4,12 +4,11 @@ package eu.dnetlib.dhp.blacklist;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import eu.dnetlib.dhp.schema.oaf.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.common.RelationLabel;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.common.RelationInverse;
public class BlacklistRelationTest { public class BlacklistRelationTest {
@Test @Test
@ -26,7 +25,9 @@ public class BlacklistRelationTest {
"resultProject_outcome_isProducedBy"); "resultProject_outcome_isProducedBy");
rels.forEach(r -> { rels.forEach(r -> {
RelationInverse inverse = ModelSupport.relationInverseMap.get(r); RelationLabel inverse =
ModelSupport.unRel(r);
Assertions.assertNotNull(inverse); Assertions.assertNotNull(inverse);
Assertions.assertNotNull(inverse.getRelType()); Assertions.assertNotNull(inverse.getRelType());
Assertions.assertNotNull(inverse.getSubReltype()); Assertions.assertNotNull(inverse.getSubReltype());

Some files were not shown because too many files have changed in this diff Show More