1
0
Fork 0

restored changes from master branch

This commit is contained in:
Claudio Atzori 2020-04-20 18:53:06 +02:00
parent eb8a020859
commit d772d967aa
18 changed files with 369 additions and 230 deletions

View File

@ -1,14 +1,25 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable; import java.io.Serializable;
import java.util.List;
import java.util.Objects; import java.util.Objects;
public abstract class Oaf implements Serializable { public abstract class Oaf implements Serializable {
protected List<KeyValue> collectedfrom;
private DataInfo dataInfo; private DataInfo dataInfo;
private Long lastupdatetimestamp; private Long lastupdatetimestamp;
public List<KeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<KeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public DataInfo getDataInfo() { public DataInfo getDataInfo() {
return dataInfo; return dataInfo;
} }

View File

@ -10,8 +10,6 @@ public abstract class OafEntity extends Oaf implements Serializable {
private List<String> originalId; private List<String> originalId;
private List<KeyValue> collectedfrom;
private List<StructuredProperty> pid; private List<StructuredProperty> pid;
private String dateofcollection; private String dateofcollection;
@ -38,14 +36,6 @@ public abstract class OafEntity extends Oaf implements Serializable {
this.originalId = originalId; this.originalId = originalId;
} }
public List<KeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<KeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public List<StructuredProperty> getPid() { public List<StructuredProperty> getPid() {
return pid; return pid;
} }

View File

@ -18,8 +18,6 @@ public class Relation extends Oaf {
private String target; private String target;
private List<KeyValue> collectedFrom = new ArrayList<>();
public String getRelType() { public String getRelType() {
return relType; return relType;
} }
@ -60,14 +58,6 @@ public class Relation extends Oaf {
this.target = target; this.target = target;
} }
public List<KeyValue> getCollectedFrom() {
return collectedFrom;
}
public void setCollectedFrom(final List<KeyValue> collectedFrom) {
this.collectedFrom = collectedFrom;
}
public void mergeFrom(final Relation r) { public void mergeFrom(final Relation r) {
checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal"); checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal");
@ -77,12 +67,12 @@ public class Relation extends Oaf {
Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal"); Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal");
checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal"); checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal");
setCollectedFrom( setCollectedfrom(
Stream.concat( Stream.concat(
Optional.ofNullable(getCollectedFrom()) Optional.ofNullable(getCollectedfrom())
.map(Collection::stream) .map(Collection::stream)
.orElse(Stream.empty()), .orElse(Stream.empty()),
Optional.ofNullable(r.getCollectedFrom()) Optional.ofNullable(r.getCollectedfrom())
.map(Collection::stream) .map(Collection::stream)
.orElse(Stream.empty())) .orElse(Stream.empty()))
.distinct() // relies on KeyValue.equals .distinct() // relies on KeyValue.equals
@ -103,6 +93,6 @@ public class Relation extends Oaf {
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(relType, subRelType, relClass, source, target, collectedFrom); return Objects.hash(relType, subRelType, relClass, source, target, collectedfrom);
} }
} }

View File

@ -46,7 +46,7 @@ public class ProtoConverter implements Serializable {
rel.setRelType(r.getRelType().toString()); rel.setRelType(r.getRelType().toString());
rel.setSubRelType(r.getSubRelType().toString()); rel.setSubRelType(r.getSubRelType().toString());
rel.setRelClass(r.getRelClass()); rel.setRelClass(r.getRelClass());
rel.setCollectedFrom( rel.setCollectedfrom(
r.getCollectedfromCount() > 0 r.getCollectedfromCount() > 0
? r.getCollectedfromList().stream() ? r.getCollectedfromList().stream()
.map(kv -> mapKV(kv)) .map(kv -> mapKV(kv))

View File

@ -1,10 +1,10 @@
{"collectedFrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::d0bbea1f5bed5864d1904eb602e608a6"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::d0bbea1f5bed5864d1904eb602e608a6"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|OpenstarTs__::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::fc7459b8fed8c0d47947fe04275251c0"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|OpenstarTs__::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::fc7459b8fed8c0d47947fe04275251c0"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|NARCIS__cris::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::c978e29d3b2ddf4f0c2b6e60d6613426"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|NARCIS__cris::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::c978e29d3b2ddf4f0c2b6e60d6613426"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::b58bdbe8ae5acead04fc76777d2f8017"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::b58bdbe8ae5acead04fc76777d2f8017"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::8de0f5a712997aafe0d794a53e51b75a"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::8de0f5a712997aafe0d794a53e51b75a"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|UnityFVG____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::89bab7c5a227fc27b2b9cadf475a6b71"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|UnityFVG____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::89bab7c5a227fc27b2b9cadf475a6b71"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::007a4870b31056f89b768cf508e1538e"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::007a4870b31056f89b768cf508e1538e"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|VTTRsInSsCrs::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::735915884eb439d42953372eaf934782"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|VTTRsInSsCrs::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::735915884eb439d42953372eaf934782"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::9ea9c0996c87e1dc7fc69f94b5ed0010"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::9ea9c0996c87e1dc7fc69f94b5ed0010"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","subRelType":"provision","target":"20|openaire____::c24a458004a31f9687089ea3d249de51"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","subRelType":"provision","target":"20|openaire____::c24a458004a31f9687089ea3d249de51"}

View File

@ -1,9 +1,38 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.dataInfo;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.journal;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.keyValue;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Author;
import java.util.*; import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.OAIProvenance;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentFactory; import org.dom4j.DocumentFactory;
@ -29,6 +58,12 @@ public abstract class AbstractMdRecordToOafMapper {
qualifier("software", "software", "dnet:result_typologies", "dnet:result_typologies"); qualifier("software", "software", "dnet:result_typologies", "dnet:result_typologies");
protected static final Qualifier OTHER_RESULTTYPE_QUALIFIER = protected static final Qualifier OTHER_RESULTTYPE_QUALIFIER =
qualifier("other", "other", "dnet:result_typologies", "dnet:result_typologies"); qualifier("other", "other", "dnet:result_typologies", "dnet:result_typologies");
protected static final Qualifier REPOSITORY_QUALIFIER =
qualifier(
"sysimport:crosswalk:repository",
"sysimport:crosswalk:repository",
"dnet:provenanceActions",
"dnet:provenanceActions");
protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) { protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
this.code2name = code2name; this.code2name = code2name;
@ -55,13 +90,13 @@ public abstract class AbstractMdRecordToOafMapper {
final String type = doc.valueOf("//dr:CobjCategory/@type"); final String type = doc.valueOf("//dr:CobjCategory/@type");
final KeyValue collectedFrom = final KeyValue collectedFrom =
keyValue( keyValue(
doc.valueOf("//oaf:collectedFrom/@id"), createOpenaireId(10, doc.valueOf("//oaf:collectedFrom/@id"), true),
doc.valueOf("//oaf:collectedFrom/@name")); doc.valueOf("//oaf:collectedFrom/@name"));
final KeyValue hostedBy = final KeyValue hostedBy =
StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id"))
? collectedFrom ? collectedFrom
: keyValue( : keyValue(
doc.valueOf("//oaf:hostedBy/@id"), createOpenaireId(10, doc.valueOf("//oaf:hostedBy/@id"), true),
doc.valueOf("//oaf:hostedBy/@name")); doc.valueOf("//oaf:hostedBy/@name"));
final DataInfo info = prepareDataInfo(doc); final DataInfo info = prepareDataInfo(doc);
@ -154,7 +189,7 @@ public abstract class AbstractMdRecordToOafMapper {
r1.setRelClass("isProducedBy"); r1.setRelClass("isProducedBy");
r1.setSource(docId); r1.setSource(docId);
r1.setTarget(projectId); r1.setTarget(projectId);
r1.setCollectedFrom(Arrays.asList(collectedFrom)); r1.setCollectedfrom(Arrays.asList(collectedFrom));
r1.setDataInfo(info); r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp); r1.setLastupdatetimestamp(lastUpdateTimestamp);
res.add(r1); res.add(r1);
@ -165,7 +200,7 @@ public abstract class AbstractMdRecordToOafMapper {
r2.setRelClass("produces"); r2.setRelClass("produces");
r2.setSource(projectId); r2.setSource(projectId);
r2.setTarget(docId); r2.setTarget(docId);
r2.setCollectedFrom(Arrays.asList(collectedFrom)); r2.setCollectedfrom(Arrays.asList(collectedFrom));
r2.setDataInfo(info); r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp); r2.setLastupdatetimestamp(lastUpdateTimestamp);
res.add(r2); res.add(r2);
@ -398,7 +433,7 @@ public abstract class AbstractMdRecordToOafMapper {
final Node n = doc.selectSingleNode("//oaf:datainfo"); final Node n = doc.selectSingleNode("//oaf:datainfo");
if (n == null) { if (n == null) {
return null; return dataInfo(false, null, false, false, REPOSITORY_QUALIFIER, "0.9");
} }
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid"); final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");

View File

@ -1,11 +1,35 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.*; import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.asString;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.dataInfo;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.journal;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listKeyValues;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication; import eu.dnetlib.dhp.oa.graph.raw.common.AbstractMigrationApplication;
import eu.dnetlib.dhp.oa.graph.raw.common.DbClient; import eu.dnetlib.dhp.oa.graph.raw.common.DbClient;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Context;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.sql.Array; import java.sql.Array;
@ -119,7 +143,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
ds.setOriginalId(Arrays.asList(rs.getString("datasourceid"))); ds.setOriginalId(Arrays.asList(rs.getString("datasourceid")));
ds.setCollectedfrom( ds.setCollectedfrom(
listKeyValues( listKeyValues(
rs.getString("collectedfromid"), rs.getString("collectedfromname"))); createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")));
ds.setPid(new ArrayList<>()); ds.setPid(new ArrayList<>());
ds.setDateofcollection(asString(rs.getDate("dateofcollection"))); ds.setDateofcollection(asString(rs.getDate("dateofcollection")));
ds.setDateoftransformation(null); // Value not returned by the SQL query ds.setDateoftransformation(null); // Value not returned by the SQL query
@ -185,7 +210,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
p.setOriginalId(Arrays.asList(rs.getString("projectid"))); p.setOriginalId(Arrays.asList(rs.getString("projectid")));
p.setCollectedfrom( p.setCollectedfrom(
listKeyValues( listKeyValues(
rs.getString("collectedfromid"), rs.getString("collectedfromname"))); createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")));
p.setPid(new ArrayList<>()); p.setPid(new ArrayList<>());
p.setDateofcollection(asString(rs.getDate("dateofcollection"))); p.setDateofcollection(asString(rs.getDate("dateofcollection")));
p.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); p.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
@ -240,7 +266,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
o.setOriginalId(Arrays.asList(rs.getString("organizationid"))); o.setOriginalId(Arrays.asList(rs.getString("organizationid")));
o.setCollectedfrom( o.setCollectedfrom(
listKeyValues( listKeyValues(
rs.getString("collectedfromid"), rs.getString("collectedfromname"))); createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname")));
o.setPid(new ArrayList<>()); o.setPid(new ArrayList<>());
o.setDateofcollection(asString(rs.getDate("dateofcollection"))); o.setDateofcollection(asString(rs.getDate("dateofcollection")));
o.setDateoftransformation(asString(rs.getDate("dateoftransformation"))); o.setDateoftransformation(asString(rs.getDate("dateoftransformation")));
@ -285,7 +312,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final String dsId = createOpenaireId(10, rs.getString("datasource"), true); final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
final List<KeyValue> collectedFrom = final List<KeyValue> collectedFrom =
listKeyValues( listKeyValues(
rs.getString("collectedfromid"), rs.getString("collectedfromname")); createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname"));
final Relation r1 = new Relation(); final Relation r1 = new Relation();
r1.setRelType("datasourceOrganization"); r1.setRelType("datasourceOrganization");
@ -293,7 +321,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
r1.setRelClass("isProvidedBy"); r1.setRelClass("isProvidedBy");
r1.setSource(dsId); r1.setSource(dsId);
r1.setTarget(orgId); r1.setTarget(orgId);
r1.setCollectedFrom(collectedFrom); r1.setCollectedfrom(collectedFrom);
r1.setDataInfo(info); r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp); r1.setLastupdatetimestamp(lastUpdateTimestamp);
@ -303,7 +331,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
r2.setRelClass("provides"); r2.setRelClass("provides");
r2.setSource(orgId); r2.setSource(orgId);
r2.setTarget(dsId); r2.setTarget(dsId);
r2.setCollectedFrom(collectedFrom); r2.setCollectedfrom(collectedFrom);
r2.setDataInfo(info); r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp); r2.setLastupdatetimestamp(lastUpdateTimestamp);
@ -320,7 +348,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final String projectId = createOpenaireId(40, rs.getString("project"), true); final String projectId = createOpenaireId(40, rs.getString("project"), true);
final List<KeyValue> collectedFrom = final List<KeyValue> collectedFrom =
listKeyValues( listKeyValues(
rs.getString("collectedfromid"), rs.getString("collectedfromname")); createOpenaireId(10, rs.getString("collectedfromid"), true),
rs.getString("collectedfromname"));
final Relation r1 = new Relation(); final Relation r1 = new Relation();
r1.setRelType("projectOrganization"); r1.setRelType("projectOrganization");
@ -328,7 +357,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
r1.setRelClass("isParticipant"); r1.setRelClass("isParticipant");
r1.setSource(projectId); r1.setSource(projectId);
r1.setTarget(orgId); r1.setTarget(orgId);
r1.setCollectedFrom(collectedFrom); r1.setCollectedfrom(collectedFrom);
r1.setDataInfo(info); r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp); r1.setLastupdatetimestamp(lastUpdateTimestamp);
@ -338,7 +367,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
r2.setRelClass("hasParticipant"); r2.setRelClass("hasParticipant");
r2.setSource(orgId); r2.setSource(orgId);
r2.setTarget(projectId); r2.setTarget(projectId);
r2.setCollectedFrom(collectedFrom); r2.setCollectedfrom(collectedFrom);
r2.setDataInfo(info); r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp); r2.setLastupdatetimestamp(lastUpdateTimestamp);
@ -363,6 +392,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
"dnet:provenanceActions"), "dnet:provenanceActions"),
"0.9"); "0.9");
final List<KeyValue> collectedFrom =
listKeyValues(createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
try { try {
if (rs.getString("source_type").equals("context")) { if (rs.getString("source_type").equals("context")) {
@ -381,6 +413,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
r.setLastupdatetimestamp(lastUpdateTimestamp); r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setContext(prepareContext(rs.getString("source_id"), info)); r.setContext(prepareContext(rs.getString("source_id"), info));
r.setDataInfo(info); r.setDataInfo(info);
r.setCollectedfrom(collectedFrom);
return Arrays.asList(r); return Arrays.asList(r);
} else { } else {
@ -395,18 +428,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication
final Relation r2 = new Relation(); final Relation r2 = new Relation();
if (rs.getString("source_type").equals("project")) { if (rs.getString("source_type").equals("project")) {
r1.setCollectedfrom(collectedFrom);
r1.setRelType("resultProject"); r1.setRelType("resultProject");
r1.setSubRelType("outcome"); r1.setSubRelType("outcome");
r1.setRelClass("produces"); r1.setRelClass("produces");
r2.setCollectedfrom(collectedFrom);
r2.setRelType("resultProject"); r2.setRelType("resultProject");
r2.setSubRelType("outcome"); r2.setSubRelType("outcome");
r2.setRelClass("isProducedBy"); r2.setRelClass("isProducedBy");
} else { } else {
r1.setCollectedfrom(collectedFrom);
r1.setRelType("resultResult"); r1.setRelType("resultResult");
r1.setSubRelType("relationship"); r1.setSubRelType("relationship");
r1.setRelClass("isRelatedTo"); r1.setRelClass("isRelatedTo");
r2.setCollectedfrom(collectedFrom);
r2.setRelType("resultResult"); r2.setRelType("resultResult");
r2.setSubRelType("relationship"); r2.setSubRelType("relationship");
r2.setRelClass("isRelatedTo"); r2.setRelClass("isRelatedTo");

View File

@ -232,7 +232,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
r1.setRelClass("isRelatedTo"); r1.setRelClass("isRelatedTo");
r1.setSource(docId); r1.setSource(docId);
r1.setTarget(otherId); r1.setTarget(otherId);
r1.setCollectedFrom(Arrays.asList(collectedFrom)); r1.setCollectedfrom(Arrays.asList(collectedFrom));
r1.setDataInfo(info); r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp); r1.setLastupdatetimestamp(lastUpdateTimestamp);
res.add(r1); res.add(r1);
@ -243,7 +243,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
r2.setRelClass("isRelatedTo"); r2.setRelClass("isRelatedTo");
r2.setSource(otherId); r2.setSource(otherId);
r2.setTarget(docId); r2.setTarget(docId);
r2.setCollectedFrom(Arrays.asList(collectedFrom)); r2.setCollectedfrom(Arrays.asList(collectedFrom));
r2.setDataInfo(info); r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp); r2.setLastupdatetimestamp(lastUpdateTimestamp);
res.add(r2); res.add(r2);

View File

@ -334,7 +334,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
r.setRelClass(relClass); r.setRelClass(relClass);
r.setSource(source); r.setSource(source);
r.setTarget(target); r.setTarget(target);
r.setCollectedFrom(Arrays.asList(collectedFrom)); r.setCollectedfrom(Arrays.asList(collectedFrom));
r.setDataInfo(info); r.setDataInfo(info);
r.setLastupdatetimestamp(lastUpdateTimestamp); r.setLastupdatetimestamp(lastUpdateTimestamp);
return r; return r;

View File

@ -186,7 +186,7 @@ public abstract class AbstractScholexplorerParser {
r.setTarget(targetId); r.setTarget(targetId);
r.setRelType(relationSemantic); r.setRelType(relationSemantic);
r.setRelClass("datacite"); r.setRelClass("datacite");
r.setCollectedFrom(parsedObject.getCollectedfrom()); r.setCollectedfrom(parsedObject.getCollectedfrom());
r.setDataInfo(di); r.setDataInfo(di);
rels.add(r); rels.add(r);
r = new DLIRelation(); r = new DLIRelation();
@ -195,7 +195,7 @@ public abstract class AbstractScholexplorerParser {
r.setTarget(parsedObject.getId()); r.setTarget(parsedObject.getId());
r.setRelType(inverseRelation); r.setRelType(inverseRelation);
r.setRelClass("datacite"); r.setRelClass("datacite");
r.setCollectedFrom(parsedObject.getCollectedfrom()); r.setCollectedfrom(parsedObject.getCollectedfrom());
r.setDateOfCollection(dateOfCollection); r.setDateOfCollection(dateOfCollection);
rels.add(r); rels.add(r);
if ("unknown".equalsIgnoreCase(relatedType)) if ("unknown".equalsIgnoreCase(relatedType))

View File

@ -1,11 +1,16 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -43,6 +48,7 @@ public class MappersTest {
final Relation r2 = (Relation) list.get(2); final Relation r2 = (Relation) list.get(2);
assertValidId(p.getId()); assertValidId(p.getId());
assertValidId(p.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
assertTrue(p.getAuthor().size() > 0); assertTrue(p.getAuthor().size() > 0);
assertTrue(p.getSubject().size() > 0); assertTrue(p.getSubject().size() > 0);
@ -50,13 +56,24 @@ public class MappersTest {
assertTrue(StringUtils.isNotBlank(p.getJournal().getName())); assertTrue(StringUtils.isNotBlank(p.getJournal().getName()));
assertValidId(r1.getSource()); assertValidId(r1.getSource());
assertValidId(r1.getTarget());
assertValidId(r2.getSource()); assertValidId(r2.getSource());
assertValidId(r2.getTarget());
assertValidId(r1.getCollectedfrom().get(0).getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo());
assertNotNull(r1.getDataInfo().getTrust());
assertNotNull(r2.getDataInfo().getTrust());
assertEquals(r1.getSource(), r2.getTarget()); assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget()); assertEquals(r2.getSource(), r1.getTarget());
assertTrue(StringUtils.isNotBlank(r1.getRelClass())); assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
assertTrue(StringUtils.isNotBlank(r2.getRelClass())); assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
assertTrue(StringUtils.isNotBlank(r1.getRelType())); assertTrue(StringUtils.isNotBlank(r1.getRelType()));
assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(StringUtils.isNotBlank(r2.getRelType()));
// System.out.println(new ObjectMapper().writeValueAsString(r1));
// System.out.println(new ObjectMapper().writeValueAsString(r2));
} }
@Test @Test
@ -65,15 +82,35 @@ public class MappersTest {
final List<Oaf> list = new OdfToOafMapper(code2name).processMdRecord(xml); final List<Oaf> list = new OdfToOafMapper(code2name).processMdRecord(xml);
assertEquals(1, list.size()); assertEquals(3, list.size());
assertTrue(list.get(0) instanceof Dataset); assertTrue(list.get(0) instanceof Dataset);
assertTrue(list.get(1) instanceof Relation);
assertTrue(list.get(2) instanceof Relation);
final Dataset d = (Dataset) list.get(0); final Dataset d = (Dataset) list.get(0);
final Relation r1 = (Relation) list.get(1);
final Relation r2 = (Relation) list.get(2);
assertValidId(d.getId()); assertValidId(d.getId());
assertValidId(d.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
assertTrue(d.getAuthor().size() > 0); assertTrue(d.getAuthor().size() > 0);
assertTrue(d.getSubject().size() > 0); assertTrue(d.getSubject().size() > 0);
assertValidId(r1.getSource());
assertValidId(r1.getTarget());
assertValidId(r2.getSource());
assertValidId(r2.getTarget());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo());
assertNotNull(r1.getDataInfo().getTrust());
assertNotNull(r2.getDataInfo().getTrust());
assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget());
assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
assertTrue(StringUtils.isNotBlank(r1.getRelType()));
assertTrue(StringUtils.isNotBlank(r2.getRelType()));
} }
@Test @Test
@ -88,6 +125,7 @@ public class MappersTest {
final Software s = (Software) list.get(0); final Software s = (Software) list.get(0);
assertValidId(s.getId()); assertValidId(s.getId());
assertValidId(s.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(s.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(s.getTitle().get(0).getValue()));
assertTrue(s.getAuthor().size() > 0); assertTrue(s.getAuthor().size() > 0);
assertTrue(s.getSubject().size() > 0); assertTrue(s.getSubject().size() > 0);

View File

@ -1,10 +1,17 @@
package eu.dnetlib.dhp.oa.graph.raw; package eu.dnetlib.dhp.oa.graph.raw;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import java.io.IOException; import java.io.IOException;
import java.sql.Array; import java.sql.Array;
import java.sql.Date; import java.sql.Date;
@ -13,6 +20,7 @@ import java.sql.SQLException;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.extension.ExtendWith;
@ -42,14 +50,13 @@ public class MigrateDbEntitiesApplicationTest {
final Datasource ds = (Datasource) list.get(0); final Datasource ds = (Datasource) list.get(0);
assertValidId(ds.getId()); assertValidId(ds.getId());
assertValidId(ds.getCollectedfrom().get(0).getKey());
assertEquals(ds.getOfficialname().getValue(), getValueAsString("officialname", fields)); assertEquals(ds.getOfficialname().getValue(), getValueAsString("officialname", fields));
assertEquals(ds.getEnglishname().getValue(), getValueAsString("englishname", fields)); assertEquals(ds.getEnglishname().getValue(), getValueAsString("englishname", fields));
assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields)); assertEquals(ds.getContactemail().getValue(), getValueAsString("contactemail", fields));
assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals(ds.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields));
assertEquals( assertEquals(
ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields)); ds.getNamespaceprefix().getValue(), getValueAsString("namespaceprefix", fields));
assertEquals(
ds.getCollectedfrom().get(0).getKey(), getValueAsString("collectedfromid", fields));
assertEquals( assertEquals(
ds.getCollectedfrom().get(0).getValue(), ds.getCollectedfrom().get(0).getValue(),
getValueAsString("collectedfromname", fields)); getValueAsString("collectedfromname", fields));
@ -65,10 +72,9 @@ public class MigrateDbEntitiesApplicationTest {
final Project p = (Project) list.get(0); final Project p = (Project) list.get(0);
assertValidId(p.getId()); assertValidId(p.getId());
assertValidId(p.getCollectedfrom().get(0).getKey());
assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields)); assertEquals(p.getAcronym().getValue(), getValueAsString("acronym", fields));
assertEquals(p.getTitle().getValue(), getValueAsString("title", fields)); assertEquals(p.getTitle().getValue(), getValueAsString("title", fields));
assertEquals(
p.getCollectedfrom().get(0).getKey(), getValueAsString("collectedfromid", fields));
assertEquals( assertEquals(
p.getCollectedfrom().get(0).getValue(), p.getCollectedfrom().get(0).getValue(),
getValueAsString("collectedfromname", fields)); getValueAsString("collectedfromname", fields));
@ -86,6 +92,7 @@ public class MigrateDbEntitiesApplicationTest {
final Organization o = (Organization) list.get(0); final Organization o = (Organization) list.get(0);
assertValidId(o.getId()); assertValidId(o.getId());
assertValidId(o.getCollectedfrom().get(0).getKey());
assertEquals(o.getLegalshortname().getValue(), getValueAsString("legalshortname", fields)); assertEquals(o.getLegalshortname().getValue(), getValueAsString("legalshortname", fields));
assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields)); assertEquals(o.getLegalname().getValue(), getValueAsString("legalname", fields));
assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields)); assertEquals(o.getWebsiteurl().getValue(), getValueAsString("websiteurl", fields));
@ -98,8 +105,6 @@ public class MigrateDbEntitiesApplicationTest {
assertEquals( assertEquals(
o.getCountry().getSchemename(), o.getCountry().getSchemename(),
getValueAsString("country", fields).split("@@@")[3]); getValueAsString("country", fields).split("@@@")[3]);
assertEquals(
o.getCollectedfrom().get(0).getKey(), getValueAsString("collectedfromid", fields));
assertEquals( assertEquals(
o.getCollectedfrom().get(0).getValue(), o.getCollectedfrom().get(0).getValue(),
getValueAsString("collectedfromname", fields)); getValueAsString("collectedfromname", fields));
@ -137,6 +142,8 @@ public class MigrateDbEntitiesApplicationTest {
assertValidId(r2.getSource()); assertValidId(r2.getSource());
assertEquals(r1.getSource(), r2.getTarget()); assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget()); assertEquals(r2.getSource(), r1.getTarget());
assertValidId(r1.getCollectedfrom().get(0).getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey());
} }
@Test @Test
@ -146,7 +153,12 @@ public class MigrateDbEntitiesApplicationTest {
final List<Oaf> list = app.processClaims(rs); final List<Oaf> list = app.processClaims(rs);
assertEquals(1, list.size()); assertEquals(1, list.size());
assertTrue(list.get(0) instanceof Result);
final Result r = (Result) list.get(0);
verifyMocks(fields); verifyMocks(fields);
assertValidId(r.getCollectedfrom().get(0).getKey());
} }
@Test @Test
@ -157,6 +169,33 @@ public class MigrateDbEntitiesApplicationTest {
assertEquals(2, list.size()); assertEquals(2, list.size());
verifyMocks(fields); verifyMocks(fields);
assertTrue(list.get(0) instanceof Relation);
assertTrue(list.get(1) instanceof Relation);
final Relation r1 = (Relation) list.get(0);
final Relation r2 = (Relation) list.get(1);
assertValidId(r1.getSource());
assertValidId(r1.getTarget());
assertValidId(r2.getSource());
assertValidId(r2.getTarget());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo());
assertNotNull(r1.getDataInfo().getTrust());
assertNotNull(r2.getDataInfo().getTrust());
assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget());
assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
assertTrue(StringUtils.isNotBlank(r1.getRelType()));
assertTrue(StringUtils.isNotBlank(r2.getRelType()));
assertValidId(r1.getCollectedfrom().get(0).getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey());
// System.out.println(new ObjectMapper().writeValueAsString(r1));
// System.out.println(new ObjectMapper().writeValueAsString(r2));
} }
private List<TypedField> prepareMocks(final String jsonFile) throws IOException, SQLException { private List<TypedField> prepareMocks(final String jsonFile) throws IOException, SQLException {

View File

@ -87,6 +87,7 @@
<oaf:language>und</oaf:language> <oaf:language>und</oaf:language>
<oaf:concept id="https://zenodo.org/communities/epfl"/> <oaf:concept id="https://zenodo.org/communities/epfl"/>
<oaf:hostedBy id="re3data_____::r3d100010468" name="Zenodo"/> <oaf:hostedBy id="re3data_____::r3d100010468" name="Zenodo"/>
<oaf:projectid>corda_______::226852</oaf:projectid>
<oaf:collectedFrom id="re3data_____::r3d100010468" name="Zenodo"/> <oaf:collectedFrom id="re3data_____::r3d100010468" name="Zenodo"/>
</metadata> </metadata>
<about xmlns:dc="http://purl.org/dc/elements/1.1/" <about xmlns:dc="http://purl.org/dc/elements/1.1/"

View File

@ -48,7 +48,7 @@ public class Scholix implements Serializable {
if (scholixSummary.getDate() != null && scholixSummary.getDate().size() > 0) if (scholixSummary.getDate() != null && scholixSummary.getDate().size() > 0)
s.setPublicationDate(scholixSummary.getDate().get(0)); s.setPublicationDate(scholixSummary.getDate().get(0));
s.setLinkprovider( s.setLinkprovider(
rel.getCollectedFrom().stream() rel.getCollectedfrom().stream()
.map( .map(
cf -> cf ->
new ScholixEntityId( new ScholixEntityId(
@ -73,7 +73,7 @@ public class Scholix implements Serializable {
if (scholixSummary.getDate() != null && scholixSummary.getDate().size() > 0) if (scholixSummary.getDate() != null && scholixSummary.getDate().size() > 0)
s.setPublicationDate(scholixSummary.getDate().get(0)); s.setPublicationDate(scholixSummary.getDate().get(0));
s.setLinkprovider( s.setLinkprovider(
rel.getCollectedFrom().stream() rel.getCollectedfrom().stream()
.map( .map(
cf -> cf ->
new ScholixEntityId( new ScholixEntityId(

View File

@ -1 +1 @@
{"dataInfo":{"invisible":false,"inferred":null,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":null},"lastupdatetimestamp":null,"relType":"IsReferencedBy","subRelType":null,"relClass":"datacite","source":"50|dedup_______::4f00e4f0e82bb4cbb35261478e55568e","target":"60|97519e00ee2cddfa1f5bcb5220429b8f","collectedFrom":[{"key":"dli_________::europe_pmc__","value":"Europe PMC","dataInfo":null}]} {"dataInfo":{"invisible":false,"inferred":null,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":null},"lastupdatetimestamp":null,"relType":"IsReferencedBy","subRelType":null,"relClass":"datacite","source":"50|dedup_______::4f00e4f0e82bb4cbb35261478e55568e","target":"60|97519e00ee2cddfa1f5bcb5220429b8f","collectedfrom":[{"key":"dli_________::europe_pmc__","value":"Europe PMC","dataInfo":null}]}

View File

@ -95,7 +95,18 @@ public class PrepareRelationsJob {
readPathRelation(spark, inputRelationsPath) readPathRelation(spark, inputRelationsPath)
.filter( .filter(
(FilterFunction<SortableRelation>) (FilterFunction<SortableRelation>)
value -> value.getDataInfo().getDeletedbyinference() == false) r -> {
try {
return r != null
&& r.getDataInfo() != null
&& !r.getDataInfo().getDeletedbyinference();
} catch (NullPointerException e) {
log.info(
"invalid NPE '{}'",
OBJECT_MAPPER.writeValueAsString(r));
throw e;
}
})
.groupByKey( .groupByKey(
(MapFunction<SortableRelation, String>) value -> value.getSource(), (MapFunction<SortableRelation, String>) value -> value.getSource(),
Encoders.STRING()) Encoders.STRING())

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringReader; import java.io.StringReader;
import java.io.StringWriter; import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -41,6 +42,7 @@ import org.dom4j.io.XMLWriter;
public class XmlRecordFactory implements Serializable { public class XmlRecordFactory implements Serializable {
public static final String REL_SUBTYPE_DEDUP = "dedup";
private Map<String, LongAccumulator> accumulators; private Map<String, LongAccumulator> accumulators;
private Set<String> specialDatasourceTypes; private Set<String> specialDatasourceTypes;
@ -91,7 +93,14 @@ public class XmlRecordFactory implements Serializable {
// rels has to be processed before the contexts because they enrich the contextMap with // rels has to be processed before the contexts because they enrich the contextMap with
// the // the
// funding info. // funding info.
final List<String> relations = listRelations(je, templateFactory, contexts); final List<String> relations =
je.getLinks().stream()
.filter(
t ->
!REL_SUBTYPE_DEDUP.equalsIgnoreCase(
t.getRelation().getSubRelType()))
.map(link -> mapRelation(link, templateFactory, contexts))
.collect(Collectors.toCollection(ArrayList::new));
final String mainType = ModelSupport.getMainType(type); final String mainType = ModelSupport.getMainType(type);
metadata.addAll(buildContexts(mainType, contexts)); metadata.addAll(buildContexts(mainType, contexts));
@ -102,7 +111,7 @@ public class XmlRecordFactory implements Serializable {
mainType, mainType,
metadata, metadata,
relations, relations,
listChildren(entity, je.getEntity().getType(), templateFactory), listChildren(entity, je, templateFactory),
listExtraInfo(entity)); listExtraInfo(entity));
return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent); return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent);
@ -919,171 +928,149 @@ public class XmlRecordFactory implements Serializable {
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", dsType)); metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", dsType));
} }
private Qualifier getBestAccessright(final Result r) { private String mapRelation(Tuple2 link, TemplateFactory templateFactory, Set<String> contexts) {
Qualifier bestAccessRight = new Qualifier(); final Relation rel = link.getRelation();
bestAccessRight.setClassid("UNKNOWN"); final RelatedEntity re = link.getRelatedEntity();
bestAccessRight.setClassname("not available"); final String targetType = link.getRelatedEntity().getType();
bestAccessRight.setSchemeid("dnet:access_modes");
bestAccessRight.setSchemename("dnet:access_modes");
final LicenseComparator lc = new LicenseComparator(); final List<String> metadata = Lists.newArrayList();
for (final Instance instance : r.getInstance()) { switch (EntityType.valueOf(targetType)) {
if (lc.compare(bestAccessRight, instance.getAccessright()) > 0) { case publication:
bestAccessRight = instance.getAccessright(); case dataset:
} case otherresearchproduct:
case software:
if (re.getTitle() != null && isNotBlank(re.getTitle().getValue())) {
metadata.add(
XmlSerializationUtils.mapStructuredProperty("title", re.getTitle()));
}
if (isNotBlank(re.getDateofacceptance())) {
metadata.add(
XmlSerializationUtils.asXmlElement(
"dateofacceptance", re.getDateofacceptance()));
}
if (isNotBlank(re.getPublisher())) {
metadata.add(
XmlSerializationUtils.asXmlElement("publisher", re.getPublisher()));
}
if (isNotBlank(re.getCodeRepositoryUrl())) {
metadata.add(
XmlSerializationUtils.asXmlElement(
"coderepositoryurl", re.getCodeRepositoryUrl()));
}
if (re.getResulttype() != null & re.getResulttype().isBlank()) {
metadata.add(
XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype()));
}
if (re.getCollectedfrom() != null) {
metadata.addAll(
re.getCollectedfrom().stream()
.map(
kv ->
XmlSerializationUtils.mapKeyValue(
"collectedfrom", kv))
.collect(Collectors.toList()));
}
if (re.getPid() != null) {
metadata.addAll(
re.getPid().stream()
.map(p -> XmlSerializationUtils.mapStructuredProperty("pid", p))
.collect(Collectors.toList()));
}
break;
case datasource:
if (isNotBlank(re.getOfficialname())) {
metadata.add(
XmlSerializationUtils.asXmlElement(
"officialname", re.getOfficialname()));
}
if (re.getDatasourcetype() != null & !re.getDatasourcetype().isBlank()) {
mapDatasourceType(metadata, re.getDatasourcetype());
}
if (re.getOpenairecompatibility() != null
& !re.getOpenairecompatibility().isBlank()) {
metadata.add(
XmlSerializationUtils.mapQualifier(
"openairecompatibility", re.getOpenairecompatibility()));
}
break;
case organization:
if (isNotBlank(re.getLegalname())) {
metadata.add(
XmlSerializationUtils.asXmlElement("legalname", re.getLegalname()));
}
if (isNotBlank(re.getLegalshortname())) {
metadata.add(
XmlSerializationUtils.asXmlElement(
"legalshortname", re.getLegalshortname()));
}
if (re.getCountry() != null & !re.getCountry().isBlank()) {
metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry()));
}
break;
case project:
if (isNotBlank(re.getProjectTitle())) {
metadata.add(XmlSerializationUtils.asXmlElement("title", re.getProjectTitle()));
}
if (isNotBlank(re.getCode())) {
metadata.add(XmlSerializationUtils.asXmlElement("code", re.getCode()));
}
if (isNotBlank(re.getAcronym())) {
metadata.add(XmlSerializationUtils.asXmlElement("acronym", re.getAcronym()));
}
if (re.getContracttype() != null & !re.getContracttype().isBlank()) {
metadata.add(
XmlSerializationUtils.mapQualifier(
"contracttype", re.getContracttype()));
}
if (re.getFundingtree() != null & contexts != null) {
metadata.addAll(
re.getFundingtree().stream()
.peek(ft -> fillContextMap(ft, contexts))
.map(ft -> getRelFundingTree(ft))
.collect(Collectors.toList()));
}
break;
default:
throw new IllegalArgumentException("invalid target type: " + targetType);
} }
return bestAccessRight; final DataInfo info = rel.getDataInfo();
} final String scheme = ModelSupport.getScheme(re.getType(), targetType);
private List<String> listRelations( if (StringUtils.isBlank(scheme)) {
final JoinedEntity je, TemplateFactory templateFactory, final Set<String> contexts) { throw new IllegalArgumentException(
final List<String> rels = Lists.newArrayList(); String.format("missing scheme for: <%s - %s>", re.getType(), targetType));
for (final Tuple2 link : je.getLinks()) {
final Relation rel = link.getRelation();
final RelatedEntity re = link.getRelatedEntity();
final String targetType = link.getRelatedEntity().getType();
final List<String> metadata = Lists.newArrayList();
switch (EntityType.valueOf(targetType)) {
case publication:
case dataset:
case otherresearchproduct:
case software:
if (re.getTitle() != null && isNotBlank(re.getTitle().getValue())) {
metadata.add(
XmlSerializationUtils.mapStructuredProperty(
"title", re.getTitle()));
}
if (isNotBlank(re.getDateofacceptance())) {
metadata.add(
XmlSerializationUtils.asXmlElement(
"dateofacceptance", re.getDateofacceptance()));
}
if (isNotBlank(re.getPublisher())) {
metadata.add(
XmlSerializationUtils.asXmlElement("publisher", re.getPublisher()));
}
if (isNotBlank(re.getCodeRepositoryUrl())) {
metadata.add(
XmlSerializationUtils.asXmlElement(
"coderepositoryurl", re.getCodeRepositoryUrl()));
}
if (re.getResulttype() != null & re.getResulttype().isBlank()) {
metadata.add(
XmlSerializationUtils.mapQualifier(
"resulttype", re.getResulttype()));
}
if (re.getCollectedfrom() != null) {
metadata.addAll(
re.getCollectedfrom().stream()
.map(
kv ->
XmlSerializationUtils.mapKeyValue(
"collectedfrom", kv))
.collect(Collectors.toList()));
}
if (re.getPid() != null) {
metadata.addAll(
re.getPid().stream()
.map(
p ->
XmlSerializationUtils.mapStructuredProperty(
"pid", p))
.collect(Collectors.toList()));
}
break;
case datasource:
if (isNotBlank(re.getOfficialname())) {
metadata.add(
XmlSerializationUtils.asXmlElement(
"officialname", re.getOfficialname()));
}
if (re.getDatasourcetype() != null & !re.getDatasourcetype().isBlank()) {
mapDatasourceType(metadata, re.getDatasourcetype());
}
if (re.getOpenairecompatibility() != null
& !re.getOpenairecompatibility().isBlank()) {
metadata.add(
XmlSerializationUtils.mapQualifier(
"openairecompatibility", re.getOpenairecompatibility()));
}
break;
case organization:
if (isNotBlank(re.getLegalname())) {
metadata.add(
XmlSerializationUtils.asXmlElement("legalname", re.getLegalname()));
}
if (isNotBlank(re.getLegalshortname())) {
metadata.add(
XmlSerializationUtils.asXmlElement(
"legalshortname", re.getLegalshortname()));
}
if (re.getCountry() != null & !re.getCountry().isBlank()) {
metadata.add(
XmlSerializationUtils.mapQualifier("country", re.getCountry()));
}
break;
case project:
if (isNotBlank(re.getProjectTitle())) {
metadata.add(
XmlSerializationUtils.asXmlElement("title", re.getProjectTitle()));
}
if (isNotBlank(re.getCode())) {
metadata.add(XmlSerializationUtils.asXmlElement("code", re.getCode()));
}
if (isNotBlank(re.getAcronym())) {
metadata.add(
XmlSerializationUtils.asXmlElement("acronym", re.getAcronym()));
}
if (re.getContracttype() != null & !re.getContracttype().isBlank()) {
metadata.add(
XmlSerializationUtils.mapQualifier(
"contracttype", re.getContracttype()));
}
if (re.getFundingtree() != null) {
metadata.addAll(
re.getFundingtree().stream()
.peek(ft -> fillContextMap(ft, contexts))
.map(ft -> getRelFundingTree(ft))
.collect(Collectors.toList()));
}
break;
default:
throw new IllegalArgumentException("invalid target type: " + targetType);
}
final DataInfo info = rel.getDataInfo();
final String scheme = ModelSupport.getScheme(re.getType(), targetType);
if (StringUtils.isBlank(scheme)) {
throw new IllegalArgumentException(
String.format("missing scheme for: <%s - %s>", re.getType(), targetType));
}
final String accumulatorName =
getRelDescriptor(rel.getRelType(), rel.getSubRelType(), rel.getRelClass());
if (accumulators.containsKey(accumulatorName)) {
accumulators.get(accumulatorName).add(1);
}
rels.add(
templateFactory.getRel(
targetType,
rel.getTarget(),
Sets.newHashSet(metadata),
rel.getRelClass(),
scheme,
info));
} }
return rels;
final String accumulatorName =
getRelDescriptor(rel.getRelType(), rel.getSubRelType(), rel.getRelClass());
if (accumulators.containsKey(accumulatorName)) {
accumulators.get(accumulatorName).add(1);
}
return templateFactory.getRel(
targetType,
rel.getTarget(),
Sets.newHashSet(metadata),
rel.getRelClass(),
scheme,
info);
} }
private List<String> listChildren( private List<String> listChildren(
final OafEntity entity, String type, TemplateFactory templateFactory) { final OafEntity entity, JoinedEntity je, TemplateFactory templateFactory) {
final List<String> children = Lists.newArrayList(); final List<String> children = Lists.newArrayList();
EntityType entityType = EntityType.valueOf(type); EntityType entityType = EntityType.valueOf(je.getEntity().getType());
children.addAll(
je.getLinks().stream()
.filter(
link ->
REL_SUBTYPE_DEDUP.equalsIgnoreCase(
link.getRelation().getSubRelType()))
.map(link -> mapRelation(link, templateFactory, null))
.collect(Collectors.toCollection(ArrayList::new)));
if (MainEntityType.result.toString().equals(ModelSupport.getMainType(entityType))) { if (MainEntityType.result.toString().equals(ModelSupport.getMainType(entityType))) {
final List<Instance> instances = ((Result) entity).getInstance(); final List<Instance> instances = ((Result) entity).getInstance();
if (instances != null) { if (instances != null) {