forked from D-Net/dnet-hadoop
common IdentifierFactory in use on the mapping from the aggregator data
This commit is contained in:
parent
c188868450
commit
34f1d0904b
|
@ -6,6 +6,8 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentFactory;
|
||||
|
@ -133,20 +135,33 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
final DataInfo info,
|
||||
final long lastUpdateTimestamp) {
|
||||
|
||||
final List<Oaf> oafs = new ArrayList<>();
|
||||
final OafEntity entity = createEntity(doc, type, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
final String id = IdentifierFactory.createIdentifier(entity);
|
||||
if (!id.equals(entity.getId())) {
|
||||
entity.getOriginalId().add(entity.getId());
|
||||
entity.setId(id);
|
||||
}
|
||||
|
||||
final List<Oaf> oafs = Lists.newArrayList(entity);
|
||||
|
||||
if (!oafs.isEmpty()) {
|
||||
oafs.addAll(addProjectRels(doc, entity));
|
||||
oafs.addAll(addOtherResultRels(doc, entity));
|
||||
}
|
||||
|
||||
return oafs;
|
||||
}
|
||||
|
||||
private OafEntity createEntity(Document doc, String type, List<Instance> instances, KeyValue collectedFrom, DataInfo info, long lastUpdateTimestamp) {
|
||||
switch (type.toLowerCase()) {
|
||||
case "publication":
|
||||
final Publication p = new Publication();
|
||||
populateResultFields(p, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
p.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
||||
p.setJournal(prepareJournal(doc, info));
|
||||
oafs.add(p);
|
||||
break;
|
||||
return p;
|
||||
case "dataset":
|
||||
final Dataset d = new Dataset();
|
||||
populateResultFields(d, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
d.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
||||
d.setStoragedate(prepareDatasetStorageDate(doc, info));
|
||||
d.setDevice(prepareDatasetDevice(doc, info));
|
||||
d.setSize(prepareDatasetSize(doc, info));
|
||||
|
@ -154,48 +169,34 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
d.setLastmetadataupdate(prepareDatasetLastMetadataUpdate(doc, info));
|
||||
d.setMetadataversionnumber(prepareDatasetMetadataVersionNumber(doc, info));
|
||||
d.setGeolocation(prepareDatasetGeoLocations(doc, info));
|
||||
oafs.add(d);
|
||||
break;
|
||||
return d;
|
||||
case "software":
|
||||
final Software s = new Software();
|
||||
populateResultFields(s, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
s.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
||||
s.setDocumentationUrl(prepareSoftwareDocumentationUrls(doc, info));
|
||||
s.setLicense(prepareSoftwareLicenses(doc, info));
|
||||
s.setCodeRepositoryUrl(prepareSoftwareCodeRepositoryUrl(doc, info));
|
||||
s.setProgrammingLanguage(prepareSoftwareProgrammingLanguage(doc, info));
|
||||
oafs.add(s);
|
||||
break;
|
||||
return s;
|
||||
case "":
|
||||
case "otherresearchproducts":
|
||||
default:
|
||||
final OtherResearchProduct o = new OtherResearchProduct();
|
||||
populateResultFields(o, doc, instances, collectedFrom, info, lastUpdateTimestamp);
|
||||
o.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
||||
o.setContactperson(prepareOtherResearchProductContactPersons(doc, info));
|
||||
o.setContactgroup(prepareOtherResearchProductContactGroups(doc, info));
|
||||
o.setTool(prepareOtherResearchProductTools(doc, info));
|
||||
oafs.add(o);
|
||||
break;
|
||||
return o;
|
||||
}
|
||||
|
||||
if (!oafs.isEmpty()) {
|
||||
oafs.addAll(addProjectRels(doc, collectedFrom, info, lastUpdateTimestamp));
|
||||
oafs.addAll(addOtherResultRels(doc, collectedFrom, info, lastUpdateTimestamp));
|
||||
}
|
||||
|
||||
return oafs;
|
||||
}
|
||||
|
||||
private List<Oaf> addProjectRels(
|
||||
final Document doc,
|
||||
final KeyValue collectedFrom,
|
||||
final DataInfo info,
|
||||
final long lastUpdateTimestamp) {
|
||||
final OafEntity entity) {
|
||||
|
||||
final List<Oaf> res = new ArrayList<>();
|
||||
|
||||
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
|
||||
final String docId = entity.getId();
|
||||
|
||||
for (final Object o : doc.selectNodes("//oaf:projectid")) {
|
||||
|
||||
|
@ -207,13 +208,11 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
res
|
||||
.add(
|
||||
getRelation(
|
||||
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, collectedFrom, info,
|
||||
lastUpdateTimestamp));
|
||||
docId, projectId, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY, entity));
|
||||
res
|
||||
.add(
|
||||
getRelation(
|
||||
projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, collectedFrom, info,
|
||||
lastUpdateTimestamp));
|
||||
projectId, docId, RESULT_PROJECT, OUTCOME, PRODUCES, entity));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -225,26 +224,22 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
final String relType,
|
||||
final String subRelType,
|
||||
final String relClass,
|
||||
final KeyValue collectedFrom,
|
||||
final DataInfo info,
|
||||
final long lastUpdateTimestamp) {
|
||||
final OafEntity entity) {
|
||||
final Relation rel = new Relation();
|
||||
rel.setRelType(relType);
|
||||
rel.setSubRelType(subRelType);
|
||||
rel.setRelClass(relClass);
|
||||
rel.setSource(source);
|
||||
rel.setTarget(target);
|
||||
rel.setCollectedfrom(Arrays.asList(collectedFrom));
|
||||
rel.setDataInfo(info);
|
||||
rel.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
rel.setCollectedfrom(entity.getCollectedfrom());
|
||||
rel.setDataInfo(entity.getDataInfo());
|
||||
rel.setLastupdatetimestamp(entity.getLastupdatetimestamp());
|
||||
return rel;
|
||||
}
|
||||
|
||||
protected abstract List<Oaf> addOtherResultRels(
|
||||
final Document doc,
|
||||
final KeyValue collectedFrom,
|
||||
final DataInfo info,
|
||||
final long lastUpdateTimestamp);
|
||||
final OafEntity entity);
|
||||
|
||||
private void populateResultFields(
|
||||
final Result r,
|
||||
|
@ -257,7 +252,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
|||
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
|
||||
|
||||
r.setOriginalId(Arrays.asList(findOriginalId(doc)));
|
||||
r.setOriginalId(Lists.newArrayList(findOriginalId(doc)));
|
||||
|
||||
r.setCollectedfrom(Arrays.asList(collectedFrom));
|
||||
r.setPid(prepareResultPids(doc, info));
|
||||
|
|
|
@ -445,22 +445,26 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE");
|
||||
|
||||
try {
|
||||
final String targetType = rs.getString(TARGET_TYPE);
|
||||
if (rs.getString(SOURCE_TYPE).equals("context")) {
|
||||
final Result r;
|
||||
|
||||
if (rs.getString(TARGET_TYPE).equals("dataset")) {
|
||||
r = new Dataset();
|
||||
r.setResulttype(DATASET_DEFAULT_RESULTTYPE);
|
||||
} else if (rs.getString(TARGET_TYPE).equals("software")) {
|
||||
r = new Software();
|
||||
r.setResulttype(SOFTWARE_DEFAULT_RESULTTYPE);
|
||||
} else if (rs.getString(TARGET_TYPE).equals("other")) {
|
||||
r = new OtherResearchProduct();
|
||||
r.setResulttype(ORP_DEFAULT_RESULTTYPE);
|
||||
} else {
|
||||
r = new Publication();
|
||||
r.setResulttype(PUBLICATION_DEFAULT_RESULTTYPE);
|
||||
switch (targetType) {
|
||||
case "dataset":
|
||||
r = new Dataset();
|
||||
break;
|
||||
case "software":
|
||||
r = new Software();
|
||||
break;
|
||||
case "other":
|
||||
r = new OtherResearchProduct();
|
||||
break;
|
||||
case "publication":
|
||||
default:
|
||||
r = new Publication();
|
||||
break;
|
||||
}
|
||||
|
||||
r.setId(createOpenaireId(50, rs.getString("target_id"), false));
|
||||
r.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
r.setContext(prepareContext(rs.getString("source_id"), info));
|
||||
|
@ -470,7 +474,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
return Arrays.asList(r);
|
||||
} else {
|
||||
final String sourceId = createOpenaireId(rs.getString(SOURCE_TYPE), rs.getString("source_id"), false);
|
||||
final String targetId = createOpenaireId(rs.getString(TARGET_TYPE), rs.getString("target_id"), false);
|
||||
final String targetId = createOpenaireId(targetType, rs.getString("target_id"), false);
|
||||
|
||||
final Relation r1 = new Relation();
|
||||
final Relation r2 = new Relation();
|
||||
|
|
|
@ -10,6 +10,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.Element;
|
||||
|
@ -19,15 +20,6 @@ import com.google.common.collect.Lists;
|
|||
|
||||
import eu.dnetlib.dhp.common.PacePerson;
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||
|
||||
|
@ -256,12 +248,10 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
|
||||
@Override
|
||||
protected List<Oaf> addOtherResultRels(
|
||||
final Document doc,
|
||||
final KeyValue collectedFrom,
|
||||
final DataInfo info,
|
||||
final long lastUpdateTimestamp) {
|
||||
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
|
||||
final Document doc,
|
||||
final OafEntity entity) {
|
||||
|
||||
final String docId = entity.getId();
|
||||
final List<Oaf> res = new ArrayList<>();
|
||||
|
||||
for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) {
|
||||
|
@ -275,13 +265,11 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
res
|
||||
.add(
|
||||
getRelation(
|
||||
docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, collectedFrom, info,
|
||||
lastUpdateTimestamp));
|
||||
docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity));
|
||||
res
|
||||
.add(
|
||||
getRelation(
|
||||
otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, collectedFrom, info,
|
||||
lastUpdateTimestamp));
|
||||
otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity));
|
||||
}
|
||||
}
|
||||
return res;
|
||||
|
|
|
@ -12,6 +12,8 @@ import java.util.HashSet;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.Node;
|
||||
|
@ -20,15 +22,6 @@ import com.google.common.collect.Lists;
|
|||
|
||||
import eu.dnetlib.dhp.common.PacePerson;
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||
|
||||
|
@ -313,12 +306,10 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
|
||||
@Override
|
||||
protected List<Oaf> addOtherResultRels(
|
||||
final Document doc,
|
||||
final KeyValue collectedFrom,
|
||||
final DataInfo info,
|
||||
final long lastUpdateTimestamp) {
|
||||
final Document doc,
|
||||
final OafEntity entity) {
|
||||
|
||||
final String docId = createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false);
|
||||
final String docId = entity.getId();
|
||||
|
||||
final List<Oaf> res = new ArrayList<>();
|
||||
|
||||
|
@ -330,30 +321,26 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
final String otherId = createOpenaireId(50, originalId, false);
|
||||
final String type = ((Node) o).valueOf("@relationType");
|
||||
|
||||
if (type.equalsIgnoreCase("IsSupplementTo")) {
|
||||
if (type.equalsIgnoreCase(IS_SUPPLEMENT_TO)) {
|
||||
res
|
||||
.add(
|
||||
getRelation(
|
||||
docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, collectedFrom, info,
|
||||
lastUpdateTimestamp));
|
||||
docId, otherId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENT_TO, entity));
|
||||
res
|
||||
.add(
|
||||
getRelation(
|
||||
otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, collectedFrom, info,
|
||||
lastUpdateTimestamp));
|
||||
} else if (type.equals("IsPartOf")) {
|
||||
|
||||
otherId, docId, RESULT_RESULT, SUPPLEMENT, IS_SUPPLEMENTED_BY, entity));
|
||||
} else if (type.equalsIgnoreCase(IS_PART_OF)) {
|
||||
res
|
||||
.add(
|
||||
getRelation(
|
||||
docId, otherId, RESULT_RESULT, PART, IS_PART_OF, collectedFrom, info,
|
||||
lastUpdateTimestamp));
|
||||
docId, otherId, RESULT_RESULT, PART, IS_PART_OF, entity));
|
||||
res
|
||||
.add(
|
||||
getRelation(
|
||||
otherId, docId, RESULT_RESULT, PART, HAS_PARTS, collectedFrom, info,
|
||||
lastUpdateTimestamp));
|
||||
otherId, docId, RESULT_RESULT, PART, HAS_PARTS, entity));
|
||||
} else {
|
||||
// TODO catch more semantics
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,8 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
|
@ -24,14 +26,6 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
import eu.dnetlib.dhp.oa.graph.clean.CleaningFunctionTest;
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
|
@ -71,7 +65,7 @@ public class MappersTest {
|
|||
|
||||
assertValidId(p.getId());
|
||||
|
||||
assertTrue(p.getOriginalId().size() == 1);
|
||||
assertTrue(p.getOriginalId().size() == 2);
|
||||
assertEquals("10.3897/oneeco.2.e13718", p.getOriginalId().get(0));
|
||||
|
||||
assertValidId(p.getCollectedfrom().get(0).getKey());
|
||||
|
@ -123,22 +117,7 @@ public class MappersTest {
|
|||
|
||||
assertNotNull(p.getBestaccessright());
|
||||
assertEquals("OPEN", p.getBestaccessright().getClassid());
|
||||
assertValidId(r1.getSource());
|
||||
assertValidId(r1.getTarget());
|
||||
assertValidId(r2.getSource());
|
||||
assertValidId(r2.getTarget());
|
||||
assertValidId(r1.getCollectedfrom().get(0).getKey());
|
||||
assertValidId(r2.getCollectedfrom().get(0).getKey());
|
||||
assertNotNull(r1.getDataInfo());
|
||||
assertNotNull(r2.getDataInfo());
|
||||
assertNotNull(r1.getDataInfo().getTrust());
|
||||
assertNotNull(r2.getDataInfo().getTrust());
|
||||
assertEquals(r1.getSource(), r2.getTarget());
|
||||
assertEquals(r2.getSource(), r1.getTarget());
|
||||
assertTrue(StringUtils.isNotBlank(r1.getRelClass()));
|
||||
assertTrue(StringUtils.isNotBlank(r2.getRelClass()));
|
||||
assertTrue(StringUtils.isNotBlank(r1.getRelType()));
|
||||
assertTrue(StringUtils.isNotBlank(r2.getRelType()));
|
||||
verifyRelations(p, r1, r2);
|
||||
|
||||
// System.out.println(new ObjectMapper().writeValueAsString(p));
|
||||
// System.out.println(new ObjectMapper().writeValueAsString(r1));
|
||||
|
@ -177,7 +156,7 @@ public class MappersTest {
|
|||
final Relation r2 = (Relation) list.get(2);
|
||||
|
||||
assertValidId(d.getId());
|
||||
assertTrue(d.getOriginalId().size() == 1);
|
||||
assertTrue(d.getOriginalId().size() == 2);
|
||||
assertEquals("oai:zenodo.org:3234526", d.getOriginalId().get(0));
|
||||
assertValidId(d.getCollectedfrom().get(0).getKey());
|
||||
assertTrue(StringUtils.isNotBlank(d.getTitle().get(0).getValue()));
|
||||
|
@ -230,10 +209,19 @@ public class MappersTest {
|
|||
});
|
||||
assertEquals("0001", d.getInstance().get(0).getRefereed().getClassid());
|
||||
|
||||
verifyRelations(d, r1, r2);
|
||||
}
|
||||
|
||||
private void verifyRelations(OafEntity e, Relation r1, Relation r2) {
|
||||
assertEquals(e.getId(), r1.getSource());
|
||||
assertEquals(e.getId(), r2.getTarget());
|
||||
|
||||
assertValidId(r1.getSource());
|
||||
assertValidId(r1.getTarget());
|
||||
assertValidId(r2.getSource());
|
||||
assertValidId(r2.getTarget());
|
||||
assertValidId(r1.getCollectedfrom().get(0).getKey());
|
||||
assertValidId(r2.getCollectedfrom().get(0).getKey());
|
||||
assertNotNull(r1.getDataInfo());
|
||||
assertNotNull(r2.getDataInfo());
|
||||
assertNotNull(r1.getDataInfo().getTrust());
|
||||
|
|
Loading…
Reference in New Issue