common IdentifierFactory in use on the mapping from the aggregator data

This commit is contained in:
Claudio Atzori 2020-10-16 16:00:19 +02:00
parent c188868450
commit 34f1d0904b
5 changed files with 82 additions and 120 deletions

View File

@ -6,6 +6,8 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.util.*; import java.util.*;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentFactory; import org.dom4j.DocumentFactory;
@ -133,20 +135,33 @@ public abstract class AbstractMdRecordToOafMapper {
final DataInfo info, final DataInfo info,
final long lastUpdateTimestamp) { final long lastUpdateTimestamp) {
final List<Oaf> oafs = new ArrayList<>(); final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
final String id = IdentifierFactory.createIdentifier(entity);
if (!id.equals(entity.getId())) {
entity.getOriginalId().add(entity.getId());
entity.setId(id);
}
final List<Oaf> oafs = Lists.newArrayList(entity);
if (!oafs.isEmpty()) {
oafs.addAll(addProjectRels(doc, entity));
oafs.addAll(addOtherResultRels(doc, entity));
}
return oafs;
}
private OafEntity createEntity(Document doc, String type, List<Instance> instances, KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) {
switch (type.toLowerCase()) { switch (type.toLowerCase()) {
case "publication": case "publication":
final Publication p = new Publication(); final Publication p = new Publication();
populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp); populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp);
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
p.setJournal(prepareJournal(doc, info)); p.setJournal(prepareJournal(doc, info));
oafs.add(p); return p;
break;
case "dataset": case "dataset":
final Dataset d = new Dataset(); final Dataset d = new Dataset();
populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp); populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp);
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
d.setStoragedate(prepareDatasetStorageDate(doc, info)); d.setStoragedate(prepareDatasetStorageDate(doc, info));
d.setDevice(prepareDatasetDevice(doc, info)); d.setDevice(prepareDatasetDevice(doc, info));
d.setSize(prepareDatasetSize(doc, info)); d.setSize(prepareDatasetSize(doc, info));
@ -154,48 +169,34 @@ public abstract class AbstractMdRecordToOafMapper {
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info)); d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info));
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info)); d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info));
d.setGeolocation(prepareDatasetGeoLocations(doc, info)); d.setGeolocation(prepareDatasetGeoLocations(doc, info));
oafs.add(d); return d;
break;
case "software": case "software":
final Software s = new Software(); final Software s = new Software();
populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp); populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp);
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info)); s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
s.setLicense(prepareSoftwareLicenses(doc, info)); s.setLicense(prepareSoftwareLicenses(doc, info));
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info)); s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info)); s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
oafs.add(s); return s;
break;
case "": case "":
case "otherresearchproducts": case "otherresearchproducts":
default: default:
final OtherResearchProduct o = new OtherResearchProduct(); final OtherResearchProduct o = new OtherResearchProduct();
populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp); populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp);
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info)); o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info)); o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
o.setTool(prepareOtherResearchProductTools(doc, info)); o.setTool(prepareOtherResearchProductTools(doc, info));
oafs.add(o); return o;
break;
} }
if (!oafs.isEmpty()) {
oafs.addAll(addProjectRels(doc, collectedFrom, info, lastUpdateTimestamp));
oafs.addAll(addOtherResultRels(doc, collectedFrom, info, lastUpdateTimestamp));
}
return oafs;
} }
private List<Oaf> addProjectRels( private List<Oaf> addProjectRels(
final Document doc, final Document doc,
final KeyValue collectedFrom, final OafEntity entity) {
final DataInfo info,
final long lastUpdateTimestamp) {
final List<Oaf> res = new ArrayList<>(); final List<Oaf> res = new ArrayList<>();
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false); final String docId = entity.getId();
for (final Object o : doc.selectNodes("//oaf:projectid")) { for (final Object o : doc.selectNodes("//oaf:projectid")) {
@ -207,13 +208,11 @@ public abstract class AbstractMdRecordToOafMapper {
res res
.add( .add(
getRelation( getRelation(
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info, docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity));
lastUpdateTimestamp));
res res
.add( .add(
getRelation( getRelation(
projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info, projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity));
lastUpdateTimestamp));
} }
} }
@ -225,26 +224,22 @@ public abstract class AbstractMdRecordToOafMapper {
final String relType, final String relType,
final String subRelType, final String subRelType,
final String relClass, final String relClass,
final KeyValue collectedFrom, final OafEntity entity) {
final DataInfo info,
final long lastUpdateTimestamp) {
final Relation rel = new Relation(); final Relation rel = new Relation();
rel.setRelType(relType); rel.setRelType(relType);
rel.setSubRelType(subRelType); rel.setSubRelType(subRelType);
rel.setRelClass(relClass); rel.setRelClass(relClass);
rel.setSource(source); rel.setSource(source);
rel.setTarget(target); rel.setTarget(target);
rel.setCollectedfrom(Arrays.asList(collectedFrom)); rel.setCollectedfrom(entity.getCollectedfrom());
rel.setDataInfo(info); rel.setDataInfo(entity.getDataInfo());
rel.setLastupdatetimestamp(lastUpdateTimestamp); rel.setLastupdatetimestamp(entity.getLastupdatetimestamp());
return rel; return rel;
} }
protected abstract List<Oaf> addOtherResultRels( protected abstract List<Oaf> addOtherResultRels(
final Document doc, final Document doc,
final KeyValue collectedFrom, final OafEntity entity);
final DataInfo info,
final long lastUpdateTimestamp);
private void populateResultFields( private void populateResultFields(
final Result r, final Result r,
@ -257,7 +252,7 @@ public abstract class AbstractMdRecordToOafMapper {
r.setLastupdatetimestamp(lastUpdateTimestamp); r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false)); r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
r.setOriginalId(Arrays.asList(findOriginalId(doc))); r.setOriginalId(Lists.newArrayList(findOriginalId(doc)));
r.setCollectedfrom(Arrays.asList(collectedFrom)); r.setCollectedfrom(Arrays.asList(collectedFrom));
r.setPid(prepareResultPids(doc, info)); r.setPid(prepareResultPids(doc, info));

View File

@ -445,22 +445,26 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
try { try {
final String targetType = rs.getString(TARGET_TYPE);
if (rs.getString(SOURCE_TYPE).equals("context")) { if (rs.getString(SOURCE_TYPE).equals("context")) {
final Result r; final Result r;
if (rs.getString(TARGET_TYPE).equals("dataset")) { switch (targetType) {
r = new Dataset(); case "dataset":
r.setResulttype(DATASET_DEFAULT_RESULTTYPE); r = new Dataset();
} else if (rs.getString(TARGET_TYPE).equals("software")) { break;
r = new Software(); case "software":
r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE); r = new Software();
} else if (rs.getString(TARGET_TYPE).equals("other")) { break;
r = new OtherResearchProduct(); case "other":
r.setResulttype(ORP_DEFAULT_RESULTTYPE); r = new OtherResearchProduct();
} else { break;
r = new Publication(); case "publication":
r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE); default:
r = new Publication();
break;
} }
r.setId(createOpenaireId(50, rs.getString("target_id"), false)); r.setId(createOpenaireId(50, rs.getString("target_id"), false));
r.setLastupdatetimestamp(lastUpdateTimestamp); r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setContext(prepareContext(rs.getString("source_id"), info)); r.setContext(prepareContext(rs.getString("source_id"), info));
@ -470,7 +474,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
return Arrays.asList(r); return Arrays.asList(r);
} else { } else {
final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false); final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false);
final String targetId = createOpenaireId(rs.getString(TARGET_TYPE), rs.getString("target_id"), false); final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
final Relation r1 = new Relation(); final Relation r1 = new Relation();
final Relation r2 = new Relation(); final Relation r2 = new Relation();

View File

@ -10,6 +10,7 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Element; import org.dom4j.Element;
@ -19,15 +20,6 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OafToOafMapper extends AbstractMdRecordToOafMapper { public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@ -256,12 +248,10 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected List<Oaf> addOtherResultRels( protected List<Oaf> addOtherResultRels(
final Document doc, final Document doc,
final KeyValue collectedFrom, final OafEntity entity) {
final DataInfo info,
final long lastUpdateTimestamp) {
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
final String docId = entity.getId();
final List<Oaf> res = new ArrayList<>(); final List<Oaf> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) { for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) {
@ -275,13 +265,11 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
res res
.add( .add(
getRelation( getRelation(
docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, collectedFrom, info, docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity));
lastUpdateTimestamp));
res res
.add( .add(
getRelation( getRelation(
otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, collectedFrom, info, otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity));
lastUpdateTimestamp));
} }
} }
return res; return res;

View File

@ -12,6 +12,8 @@ import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.Node; import org.dom4j.Node;
@ -20,15 +22,6 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OdfToOafMapper extends AbstractMdRecordToOafMapper { public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@ -313,12 +306,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override @Override
protected List<Oaf> addOtherResultRels( protected List<Oaf> addOtherResultRels(
final Document doc, final Document doc,
final KeyValue collectedFrom, final OafEntity entity) {
final DataInfo info,
final long lastUpdateTimestamp) {
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false); final String docId = entity.getId();
final List<Oaf> res = new ArrayList<>(); final List<Oaf> res = new ArrayList<>();
@ -330,30 +321,26 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final String otherId = createOpenaireId(50, originalId, false); final String otherId = createOpenaireId(50, originalId, false);
final String type = ((Node) o).valueOf("@relationType"); final String type = ((Node) o).valueOf("@relationType");
if (type.equalsIgnoreCase("IsSupplementTo")) { if (type.equalsIgnoreCase(IS_SUPPLEMENT_TO)) {
res res
.add( .add(
getRelation( getRelation(
docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info, docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, entity));
lastUpdateTimestamp));
res res
.add( .add(
getRelation( getRelation(
otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info, otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, entity));
lastUpdateTimestamp)); } else if (type.equalsIgnoreCase(IS_PART_OF)) {
} else if (type.equals("IsPartOf")) {
res res
.add( .add(
getRelation( getRelation(
docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info, docId, otherId, RESULT_RESULT, PART, IS_PART_OF, entity));
lastUpdateTimestamp));
res res
.add( .add(
getRelation( getRelation(
otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info, otherId, docId, RESULT_RESULT, PART, HAS_PARTS, entity));
lastUpdateTimestamp));
} else { } else {
// TODO catch more semantics
} }
} }
} }

View File

@ -11,6 +11,8 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
@ -24,14 +26,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest; import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
@ -71,7 +65,7 @@ public class MappersTest {
assertValidId(p.getId()); assertValidId(p.getId());
assertTrue(p.getOriginalId().size() == 1); assertTrue(p.getOriginalId().size() == 2);
assertEquals("10.3897/oneeco.2.e13718", p.getOriginalId().get(0)); assertEquals("10.3897/oneeco.2.e13718", p.getOriginalId().get(0));
assertValidId(p.getCollectedfrom().get(0).getKey()); assertValidId(p.getCollectedfrom().get(0).getKey());
@ -123,22 +117,7 @@ public class MappersTest {
assertNotNull(p.getBestaccessright()); assertNotNull(p.getBestaccessright());
assertEquals("OPEN", p.getBestaccessright().getClassid()); assertEquals("OPEN", p.getBestaccessright().getClassid());
assertValidId(r1.getSource()); verifyRelations(p, r1, r2);
assertValidId(r1.getTarget());
assertValidId(r2.getSource());
assertValidId(r2.getTarget());
assertValidId(r1.getCollectedfrom().get(0).getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo());
assertNotNull(r1.getDataInfo().getTrust());
assertNotNull(r2.getDataInfo().getTrust());
assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget());
assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
assertTrue(StringUtils.isNotBlank(r1.getRelType()));
assertTrue(StringUtils.isNotBlank(r2.getRelType()));
// System.out.println(new ObjectMapper().writeValueAsString(p)); // System.out.println(new ObjectMapper().writeValueAsString(p));
// System.out.println(new ObjectMapper().writeValueAsString(r1)); // System.out.println(new ObjectMapper().writeValueAsString(r1));
@ -177,7 +156,7 @@ public class MappersTest {
final Relation r2 = (Relation) list.get(2); final Relation r2 = (Relation) list.get(2);
assertValidId(d.getId()); assertValidId(d.getId());
assertTrue(d.getOriginalId().size() == 1); assertTrue(d.getOriginalId().size() == 2);
assertEquals("oai:zenodo.org:3234526", d.getOriginalId().get(0)); assertEquals("oai:zenodo.org:3234526", d.getOriginalId().get(0));
assertValidId(d.getCollectedfrom().get(0).getKey()); assertValidId(d.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue())); assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
@ -230,10 +209,19 @@ public class MappersTest {
}); });
assertEquals("0001", d.getInstance().get(0).getRefereed().getClassid()); assertEquals("0001", d.getInstance().get(0).getRefereed().getClassid());
verifyRelations(d, r1, r2);
}
private void verifyRelations(OafEntity e, Relation r1, Relation r2) {
assertEquals(e.getId(), r1.getSource());
assertEquals(e.getId(), r2.getTarget());
assertValidId(r1.getSource()); assertValidId(r1.getSource());
assertValidId(r1.getTarget()); assertValidId(r1.getTarget());
assertValidId(r2.getSource()); assertValidId(r2.getSource());
assertValidId(r2.getTarget()); assertValidId(r2.getTarget());
assertValidId(r1.getCollectedfrom().get(0).getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey());
assertNotNull(r1.getDataInfo()); assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo()); assertNotNull(r2.getDataInfo());
assertNotNull(r1.getDataInfo().getTrust()); assertNotNull(r1.getDataInfo().getTrust());