common IdentifierFactory in use on the mapping from the aggregator data

This commit is contained in:
Claudio Atzori 2020-10-16 16:00:19 +02:00
parent c188868450
commit 34f1d0904b
5 changed files with 82 additions and 120 deletions

View File

@ -6,6 +6,8 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import java.util.*;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.DocumentFactory;
@ -133,20 +135,33 @@ public abstract class AbstractMdRecordToOafMapper {
final DataInfo info,
final long lastUpdateTimestamp) {
final List<Oaf> oafs = new ArrayList<>();
final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
final String id = IdentifierFactory.createIdentifier(entity);
if (!id.equals(entity.getId())) {
entity.getOriginalId().add(entity.getId());
entity.setId(id);
}
final List<Oaf> oafs = Lists.newArrayList(entity);
if (!oafs.isEmpty()) {
oafs.addAll(addProjectRels(doc, entity));
oafs.addAll(addOtherResultRels(doc, entity));
}
return oafs;
}
private OafEntity createEntity(Document doc, String type, List<Instance> instances, KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) {
switch (type.toLowerCase()) {
case "publication":
final Publication p = new Publication();
populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp);
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
p.setJournal(prepareJournal(doc, info));
oafs.add(p);
break;
return p;
case "dataset":
final Dataset d = new Dataset();
populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp);
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
d.setStoragedate(prepareDatasetStorageDate(doc, info));
d.setDevice(prepareDatasetDevice(doc, info));
d.setSize(prepareDatasetSize(doc, info));
@ -154,48 +169,34 @@ public abstract class AbstractMdRecordToOafMapper {
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info));
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info));
d.setGeolocation(prepareDatasetGeoLocations(doc, info));
oafs.add(d);
break;
return d;
case "software":
final Software s = new Software();
populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp);
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
s.setLicense(prepareSoftwareLicenses(doc, info));
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
oafs.add(s);
break;
return s;
case "":
case "otherresearchproducts":
default:
final OtherResearchProduct o = new OtherResearchProduct();
populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp);
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
o.setTool(prepareOtherResearchProductTools(doc, info));
oafs.add(o);
break;
return o;
}
if (!oafs.isEmpty()) {
oafs.addAll(addProjectRels(doc, collectedFrom, info, lastUpdateTimestamp));
oafs.addAll(addOtherResultRels(doc, collectedFrom, info, lastUpdateTimestamp));
}
return oafs;
}
private List<Oaf> addProjectRels(
final Document doc,
final KeyValue collectedFrom,
final DataInfo info,
final long lastUpdateTimestamp) {
final OafEntity entity) {
final List<Oaf> res = new ArrayList<>();
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
final String docId = entity.getId();
for (final Object o : doc.selectNodes("//oaf:projectid")) {
@ -207,13 +208,11 @@ public abstract class AbstractMdRecordToOafMapper {
res
.add(
getRelation(
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info,
lastUpdateTimestamp));
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity));
res
.add(
getRelation(
projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info,
lastUpdateTimestamp));
projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity));
}
}
@ -225,26 +224,22 @@ public abstract class AbstractMdRecordToOafMapper {
final String relType,
final String subRelType,
final String relClass,
final KeyValue collectedFrom,
final DataInfo info,
final long lastUpdateTimestamp) {
final OafEntity entity) {
final Relation rel = new Relation();
rel.setRelType(relType);
rel.setSubRelType(subRelType);
rel.setRelClass(relClass);
rel.setSource(source);
rel.setTarget(target);
rel.setCollectedfrom(Arrays.asList(collectedFrom));
rel.setDataInfo(info);
rel.setLastupdatetimestamp(lastUpdateTimestamp);
rel.setCollectedfrom(entity.getCollectedfrom());
rel.setDataInfo(entity.getDataInfo());
rel.setLastupdatetimestamp(entity.getLastupdatetimestamp());
return rel;
}
protected abstract List<Oaf> addOtherResultRels(
final Document doc,
final KeyValue collectedFrom,
final DataInfo info,
final long lastUpdateTimestamp);
final OafEntity entity);
private void populateResultFields(
final Result r,
@ -257,7 +252,7 @@ public abstract class AbstractMdRecordToOafMapper {
r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
r.setOriginalId(Arrays.asList(findOriginalId(doc)));
r.setOriginalId(Lists.newArrayList(findOriginalId(doc)));
r.setCollectedfrom(Arrays.asList(collectedFrom));
r.setPid(prepareResultPids(doc, info));

View File

@ -445,22 +445,26 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
try {
final String targetType = rs.getString(TARGET_TYPE);
if (rs.getString(SOURCE_TYPE).equals("context")) {
final Result r;
if (rs.getString(TARGET_TYPE).equals("dataset")) {
switch (targetType) {
case "dataset":
r = new Dataset();
r.setResulttype(DATASET_DEFAULT_RESULTTYPE);
} else if (rs.getString(TARGET_TYPE).equals("software")) {
break;
case "software":
r = new Software();
r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
} else if (rs.getString(TARGET_TYPE).equals("other")) {
break;
case "other":
r = new OtherResearchProduct();
r.setResulttype(ORP_DEFAULT_RESULTTYPE);
} else {
break;
case "publication":
default:
r = new Publication();
r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
break;
}
r.setId(createOpenaireId(50, rs.getString("target_id"), false));
r.setLastupdatetimestamp(lastUpdateTimestamp);
r.setContext(prepareContext(rs.getString("source_id"), info));
@ -470,7 +474,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
return Arrays.asList(r);
} else {
final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false);
final String targetId = createOpenaireId(rs.getString(TARGET_TYPE), rs.getString("target_id"), false);
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
final Relation r1 = new Relation();
final Relation r2 = new Relation();

View File

@ -10,6 +10,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.Element;
@ -19,15 +20,6 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@ -257,11 +249,9 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
@Override
protected List<Oaf> addOtherResultRels(
final Document doc,
final KeyValue collectedFrom,
final DataInfo info,
final long lastUpdateTimestamp) {
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
final OafEntity entity) {
final String docId = entity.getId();
final List<Oaf> res = new ArrayList<>();
for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) {
@ -275,13 +265,11 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
res
.add(
getRelation(
docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, collectedFrom, info,
lastUpdateTimestamp));
docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity));
res
.add(
getRelation(
otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, collectedFrom, info,
lastUpdateTimestamp));
otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity));
}
}
return res;

View File

@ -12,6 +12,8 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.Node;
@ -20,15 +22,6 @@ import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.PacePerson;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@ -314,11 +307,9 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
@Override
protected List<Oaf> addOtherResultRels(
final Document doc,
final KeyValue collectedFrom,
final DataInfo info,
final long lastUpdateTimestamp) {
final OafEntity entity) {
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
final String docId = entity.getId();
final List<Oaf> res = new ArrayList<>();
@ -330,30 +321,26 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
final String otherId = createOpenaireId(50, originalId, false);
final String type = ((Node) o).valueOf("@relationType");
if (type.equalsIgnoreCase("IsSupplementTo")) {
if (type.equalsIgnoreCase(IS_SUPPLEMENT_TO)) {
res
.add(
getRelation(
docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info,
lastUpdateTimestamp));
docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, entity));
res
.add(
getRelation(
otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info,
lastUpdateTimestamp));
} else if (type.equals("IsPartOf")) {
otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, entity));
} else if (type.equalsIgnoreCase(IS_PART_OF)) {
res
.add(
getRelation(
docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info,
lastUpdateTimestamp));
docId, otherId, RESULT_RESULT, PART, IS_PART_OF, entity));
res
.add(
getRelation(
otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info,
lastUpdateTimestamp));
otherId, docId, RESULT_RESULT, PART, HAS_PARTS, entity));
} else {
// TODO catch more semantics
}
}
}

View File

@ -11,6 +11,8 @@ import java.io.IOException;
import java.util.List;
import java.util.Optional;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.BeforeEach;
@ -24,14 +26,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Author;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Field;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
@ -71,7 +65,7 @@ public class MappersTest {
assertValidId(p.getId());
assertTrue(p.getOriginalId().size() == 1);
assertTrue(p.getOriginalId().size() == 2);
assertEquals("10.3897/oneeco.2.e13718", p.getOriginalId().get(0));
assertValidId(p.getCollectedfrom().get(0).getKey());
@ -123,22 +117,7 @@ public class MappersTest {
assertNotNull(p.getBestaccessright());
assertEquals("OPEN", p.getBestaccessright().getClassid());
assertValidId(r1.getSource());
assertValidId(r1.getTarget());
assertValidId(r2.getSource());
assertValidId(r2.getTarget());
assertValidId(r1.getCollectedfrom().get(0).getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo());
assertNotNull(r1.getDataInfo().getTrust());
assertNotNull(r2.getDataInfo().getTrust());
assertEquals(r1.getSource(), r2.getTarget());
assertEquals(r2.getSource(), r1.getTarget());
assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
assertTrue(StringUtils.isNotBlank(r1.getRelType()));
assertTrue(StringUtils.isNotBlank(r2.getRelType()));
verifyRelations(p, r1, r2);
// System.out.println(new ObjectMapper().writeValueAsString(p));
// System.out.println(new ObjectMapper().writeValueAsString(r1));
@ -177,7 +156,7 @@ public class MappersTest {
final Relation r2 = (Relation) list.get(2);
assertValidId(d.getId());
assertTrue(d.getOriginalId().size() == 1);
assertTrue(d.getOriginalId().size() == 2);
assertEquals("oai:zenodo.org:3234526", d.getOriginalId().get(0));
assertValidId(d.getCollectedfrom().get(0).getKey());
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
@ -230,10 +209,19 @@ public class MappersTest {
});
assertEquals("0001", d.getInstance().get(0).getRefereed().getClassid());
verifyRelations(d, r1, r2);
}
private void verifyRelations(OafEntity e, Relation r1, Relation r2) {
assertEquals(e.getId(), r1.getSource());
assertEquals(e.getId(), r2.getTarget());
assertValidId(r1.getSource());
assertValidId(r1.getTarget());
assertValidId(r2.getSource());
assertValidId(r2.getTarget());
assertValidId(r1.getCollectedfrom().get(0).getKey());
assertValidId(r2.getCollectedfrom().get(0).getKey());
assertNotNull(r1.getDataInfo());
assertNotNull(r2.getDataInfo());
assertNotNull(r1.getDataInfo().getTrust());