forked from D-Net/dnet-hadoop
mapping datasource.journal only when an issn is available, null otherwhise
This commit is contained in:
parent
d1cbee8413
commit
d4c3476152
|
@ -0,0 +1,368 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
public class OafMapperUtils {
|
||||
|
||||
public static Oaf merge(final Oaf left, final Oaf right) {
|
||||
if (ModelSupport.isSubClass(left, OafEntity.class)) {
|
||||
return mergeEntities((OafEntity) left, (OafEntity) right);
|
||||
} else if (ModelSupport.isSubClass(left, Relation.class)) {
|
||||
((Relation) left).mergeFrom((Relation) right);
|
||||
} else {
|
||||
throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName());
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
|
||||
if (ModelSupport.isSubClass(left, Result.class)) {
|
||||
return mergeResults((Result) left, (Result) right);
|
||||
} else if (ModelSupport.isSubClass(left, Datasource.class)) {
|
||||
((Datasource) left).mergeFrom((Datasource) right);
|
||||
} else if (ModelSupport.isSubClass(left, Organization.class)) {
|
||||
((Organization) left).mergeFrom((Organization) right);
|
||||
} else if (ModelSupport.isSubClass(left, Project.class)) {
|
||||
((Project) left).mergeFrom((Project) right);
|
||||
} else {
|
||||
throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
public static Result mergeResults(Result left, Result right) {
|
||||
if (new ResultTypeComparator().compare(left, right) < 0) {
|
||||
left.mergeFrom(right);
|
||||
return left;
|
||||
} else {
|
||||
right.mergeFrom(left);
|
||||
return right;
|
||||
}
|
||||
}
|
||||
|
||||
public static KeyValue keyValue(final String k, final String v) {
|
||||
final KeyValue kv = new KeyValue();
|
||||
kv.setKey(k);
|
||||
kv.setValue(v);
|
||||
return kv;
|
||||
}
|
||||
|
||||
public static List<KeyValue> listKeyValues(final String... s) {
|
||||
if (s.length % 2 > 0) {
|
||||
throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)");
|
||||
}
|
||||
|
||||
final List<KeyValue> list = new ArrayList<>();
|
||||
for (int i = 0; i < s.length; i += 2) {
|
||||
list.add(keyValue(s[i], s[i + 1]));
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
public static <T> Field<T> field(final T value, final DataInfo info) {
|
||||
if (value == null || StringUtils.isBlank(value.toString())) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final Field<T> field = new Field<>();
|
||||
field.setValue(value);
|
||||
field.setDataInfo(info);
|
||||
return field;
|
||||
}
|
||||
|
||||
public static List<Field<String>> listFields(final DataInfo info, final String... values) {
|
||||
return Arrays
|
||||
.stream(values)
|
||||
.map(v -> field(v, info))
|
||||
.filter(Objects::nonNull)
|
||||
.filter(distinctByKey(f -> f.getValue()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static List<Field<String>> listFields(final DataInfo info, final List<String> values) {
|
||||
return values
|
||||
.stream()
|
||||
.map(v -> field(v, info))
|
||||
.filter(Objects::nonNull)
|
||||
.filter(distinctByKey(f -> f.getValue()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static Qualifier unknown(final String schemeid, final String schemename) {
|
||||
return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
|
||||
}
|
||||
|
||||
public static AccessRight accessRight(
|
||||
final String classid,
|
||||
final String classname,
|
||||
final String schemeid,
|
||||
final String schemename) {
|
||||
return accessRight(classid, classname, schemeid, schemename, null);
|
||||
}
|
||||
|
||||
public static AccessRight accessRight(
|
||||
final String classid,
|
||||
final String classname,
|
||||
final String schemeid,
|
||||
final String schemename,
|
||||
final OpenAccessRoute openAccessRoute) {
|
||||
final AccessRight accessRight = new AccessRight();
|
||||
accessRight.setClassid(classid);
|
||||
accessRight.setClassname(classname);
|
||||
accessRight.setSchemeid(schemeid);
|
||||
accessRight.setSchemename(schemename);
|
||||
accessRight.setOpenAccessRoute(openAccessRoute);
|
||||
return accessRight;
|
||||
}
|
||||
|
||||
public static Qualifier qualifier(
|
||||
final String classid,
|
||||
final String classname,
|
||||
final String schemeid,
|
||||
final String schemename) {
|
||||
final Qualifier q = new Qualifier();
|
||||
q.setClassid(classid);
|
||||
q.setClassname(classname);
|
||||
q.setSchemeid(schemeid);
|
||||
q.setSchemename(schemename);
|
||||
return q;
|
||||
}
|
||||
|
||||
public static Qualifier qualifier(final Qualifier qualifier) {
|
||||
final Qualifier q = new Qualifier();
|
||||
q.setClassid(qualifier.getClassid());
|
||||
q.setClassname(qualifier.getClassname());
|
||||
q.setSchemeid(qualifier.getSchemeid());
|
||||
q.setSchemename(qualifier.getSchemename());
|
||||
return q;
|
||||
}
|
||||
|
||||
public static StructuredProperty structuredProperty(
|
||||
final String value,
|
||||
final String classid,
|
||||
final String classname,
|
||||
final String schemeid,
|
||||
final String schemename,
|
||||
final DataInfo dataInfo) {
|
||||
|
||||
return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
|
||||
}
|
||||
|
||||
public static StructuredProperty structuredProperty(
|
||||
final String value,
|
||||
final Qualifier qualifier,
|
||||
final DataInfo dataInfo) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
final StructuredProperty sp = new StructuredProperty();
|
||||
sp.setValue(value);
|
||||
sp.setQualifier(qualifier);
|
||||
sp.setDataInfo(dataInfo);
|
||||
return sp;
|
||||
}
|
||||
|
||||
public static ExtraInfo extraInfo(
|
||||
final String name,
|
||||
final String value,
|
||||
final String typology,
|
||||
final String provenance,
|
||||
final String trust) {
|
||||
final ExtraInfo info = new ExtraInfo();
|
||||
info.setName(name);
|
||||
info.setValue(value);
|
||||
info.setTypology(typology);
|
||||
info.setProvenance(provenance);
|
||||
info.setTrust(trust);
|
||||
return info;
|
||||
}
|
||||
|
||||
public static OAIProvenance oaiIProvenance(
|
||||
final String identifier,
|
||||
final String baseURL,
|
||||
final String metadataNamespace,
|
||||
final Boolean altered,
|
||||
final String datestamp,
|
||||
final String harvestDate) {
|
||||
|
||||
final OriginDescription desc = new OriginDescription();
|
||||
desc.setIdentifier(identifier);
|
||||
desc.setBaseURL(baseURL);
|
||||
desc.setMetadataNamespace(metadataNamespace);
|
||||
desc.setAltered(altered);
|
||||
desc.setDatestamp(datestamp);
|
||||
desc.setHarvestDate(harvestDate);
|
||||
|
||||
final OAIProvenance p = new OAIProvenance();
|
||||
p.setOriginDescription(desc);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
public static Journal journal(
|
||||
final String name,
|
||||
final String issnPrinted,
|
||||
final String issnOnline,
|
||||
final String issnLinking,
|
||||
final DataInfo dataInfo) {
|
||||
|
||||
return hasIssn(issnPrinted, issnOnline, issnLinking) ? journal(
|
||||
name,
|
||||
issnPrinted,
|
||||
issnOnline,
|
||||
issnLinking,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
dataInfo) : null;
|
||||
}
|
||||
|
||||
public static Journal journal(
|
||||
final String name,
|
||||
final String issnPrinted,
|
||||
final String issnOnline,
|
||||
final String issnLinking,
|
||||
final String ep,
|
||||
final String iss,
|
||||
final String sp,
|
||||
final String vol,
|
||||
final String edition,
|
||||
final String conferenceplace,
|
||||
final String conferencedate,
|
||||
final DataInfo dataInfo) {
|
||||
|
||||
if (StringUtils.isNotBlank(name) || hasIssn(issnPrinted, issnOnline, issnLinking)) {
|
||||
final Journal j = new Journal();
|
||||
j.setName(name);
|
||||
j.setIssnPrinted(issnPrinted);
|
||||
j.setIssnOnline(issnOnline);
|
||||
j.setIssnLinking(issnLinking);
|
||||
j.setEp(ep);
|
||||
j.setIss(iss);
|
||||
j.setSp(sp);
|
||||
j.setVol(vol);
|
||||
j.setEdition(edition);
|
||||
j.setConferenceplace(conferenceplace);
|
||||
j.setConferencedate(conferencedate);
|
||||
j.setDataInfo(dataInfo);
|
||||
return j;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean hasIssn(String issnPrinted, String issnOnline, String issnLinking) {
|
||||
return StringUtils.isNotBlank(issnPrinted)
|
||||
|| StringUtils.isNotBlank(issnOnline)
|
||||
|| StringUtils.isNotBlank(issnLinking);
|
||||
}
|
||||
|
||||
public static DataInfo dataInfo(
|
||||
final Boolean deletedbyinference,
|
||||
final String inferenceprovenance,
|
||||
final Boolean inferred,
|
||||
final Boolean invisible,
|
||||
final Qualifier provenanceaction,
|
||||
final String trust) {
|
||||
final DataInfo d = new DataInfo();
|
||||
d.setDeletedbyinference(deletedbyinference);
|
||||
d.setInferenceprovenance(inferenceprovenance);
|
||||
d.setInferred(inferred);
|
||||
d.setInvisible(invisible);
|
||||
d.setProvenanceaction(provenanceaction);
|
||||
d.setTrust(trust);
|
||||
return d;
|
||||
}
|
||||
|
||||
public static String createOpenaireId(
|
||||
final int prefix,
|
||||
final String originalId,
|
||||
final boolean to_md5) {
|
||||
if (StringUtils.isBlank(originalId)) {
|
||||
return null;
|
||||
} else if (to_md5) {
|
||||
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
|
||||
final String rest = StringUtils.substringAfter(originalId, "::");
|
||||
return String.format("%s|%s::%s", prefix, nsPrefix, IdentifierFactory.md5(rest));
|
||||
} else {
|
||||
return String.format("%s|%s", prefix, originalId);
|
||||
}
|
||||
}
|
||||
|
||||
public static String createOpenaireId(
|
||||
final String type,
|
||||
final String originalId,
|
||||
final boolean to_md5) {
|
||||
switch (type) {
|
||||
case "datasource":
|
||||
return createOpenaireId(10, originalId, to_md5);
|
||||
case "organization":
|
||||
return createOpenaireId(20, originalId, to_md5);
|
||||
case "person":
|
||||
return createOpenaireId(30, originalId, to_md5);
|
||||
case "project":
|
||||
return createOpenaireId(40, originalId, to_md5);
|
||||
default:
|
||||
return createOpenaireId(50, originalId, to_md5);
|
||||
}
|
||||
}
|
||||
|
||||
public static String asString(final Object o) {
|
||||
return o == null ? "" : o.toString();
|
||||
}
|
||||
|
||||
public static <T> Predicate<T> distinctByKey(
|
||||
final Function<? super T, ?> keyExtractor) {
|
||||
final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
|
||||
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
|
||||
}
|
||||
|
||||
public static Qualifier createBestAccessRights(final List<Instance> instanceList) {
|
||||
return getBestAccessRights(instanceList);
|
||||
}
|
||||
|
||||
protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
|
||||
if (instanceList != null) {
|
||||
final Optional<AccessRight> min = instanceList
|
||||
.stream()
|
||||
.map(i -> i.getAccessright())
|
||||
.min(new AccessRightComparator<>());
|
||||
|
||||
final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier();
|
||||
|
||||
if (StringUtils.isBlank(rights.getClassid())) {
|
||||
rights.setClassid(UNKNOWN);
|
||||
}
|
||||
if (StringUtils.isBlank(rights.getClassname())
|
||||
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
|
||||
rights.setClassname(NOT_AVAILABLE);
|
||||
}
|
||||
if (StringUtils.isBlank(rights.getSchemeid())) {
|
||||
rights.setSchemeid(DNET_ACCESS_MODES);
|
||||
}
|
||||
if (StringUtils.isBlank(rights.getSchemename())) {
|
||||
rights.setSchemename(DNET_ACCESS_MODES);
|
||||
}
|
||||
|
||||
return rights;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
|
||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
|
||||
public class OafMapperUtilsTest {
|
||||
|
||||
private static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
|
||||
@Test
|
||||
public void testMergePubs() throws IOException {
|
||||
Publication p1 = read("publication_1.json", Publication.class);
|
||||
Publication p2 = read("publication_2.json", Publication.class);
|
||||
Dataset d1 = read("dataset_1.json", Dataset.class);
|
||||
Dataset d2 = read("dataset_2.json", Dataset.class);
|
||||
|
||||
assertEquals(p1.getCollectedfrom().size(), 1);
|
||||
assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID);
|
||||
assertEquals(d2.getCollectedfrom().size(), 1);
|
||||
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||
|
||||
assertTrue(
|
||||
OafMapperUtils
|
||||
.mergeResults(p1, d2)
|
||||
.getResulttype()
|
||||
.getClassid()
|
||||
.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID));
|
||||
|
||||
assertEquals(p2.getCollectedfrom().size(), 1);
|
||||
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||
assertEquals(d1.getCollectedfrom().size(), 1);
|
||||
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||
|
||||
assertTrue(
|
||||
OafMapperUtils
|
||||
.mergeResults(p2, d1)
|
||||
.getResulttype()
|
||||
.getClassid()
|
||||
.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID));
|
||||
}
|
||||
|
||||
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
|
||||
return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new));
|
||||
}
|
||||
|
||||
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
|
||||
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
|
||||
return OBJECT_MAPPER.readValue(json, clazz);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}
|
|
@ -0,0 +1 @@
|
|||
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]}
|
|
@ -0,0 +1 @@
|
|||
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}
|
|
@ -0,0 +1 @@
|
|||
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]}
|
|
@ -592,17 +592,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
return res;
|
||||
}
|
||||
|
||||
private Journal prepareJournal(final ResultSet rs, final DataInfo info) throws SQLException {
|
||||
if (Objects.isNull(rs)) {
|
||||
return null;
|
||||
} else {
|
||||
|
||||
return journal(
|
||||
rs.getString("officialname"), rs.getString("issnPrinted"), rs.getString("issnOnline"),
|
||||
rs.getString("issnLinking"), info);
|
||||
}
|
||||
}
|
||||
|
||||
public List<Oaf> processOrgOrgMergeRels(final ResultSet rs) {
|
||||
try {
|
||||
final DataInfo info = prepareDataInfo(rs); // TODO
|
||||
|
|
Loading…
Reference in New Issue