utilities to update the scholexplorer actionset content to dhp-schemas:2.4.7 implemented in ScholexplorerPatcher and used on https://iis-cdh5-test-gw.ocean.icm.edu.pl/zeppelin/#/notebook/2G6AB1GT7

This commit is contained in:
Claudio Atzori 2021-05-26 16:46:43 +02:00
parent 4f58418184
commit c312d64f87
6 changed files with 262 additions and 2 deletions

View File

@ -0,0 +1,39 @@
package eu.dnetlib.dhp.export;
import java.io.Serializable;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import eu.dnetlib.dhp.schema.oaf.Oaf;
@JsonDeserialize(
using = TolerantAtomicActionDeserializer.class)
public class AtomicActionSandro<T extends Oaf> implements Serializable {
private Class<T> clazz;
private T payload;
public AtomicActionSandro() {
}
public AtomicActionSandro(Class<T> clazz, T payload) {
this.clazz = clazz;
this.payload = payload;
}
public Class<T> getClazz() {
return this.clazz;
}
public void setClazz(Class<T> clazz) {
this.clazz = clazz;
}
public T getPayload() {
return this.payload;
}
public void setPayload(T payload) {
this.payload = payload;
}
}

View File

@ -0,0 +1,125 @@
package eu.dnetlib.dhp.export;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import scala.collection.immutable.Map;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
public class ScholexplorerPatcher {
private static final ObjectMapper mapper = new ObjectMapper();
public static Publication patchPublication(String input) {
return Optional
.ofNullable(patchAtomicAction(input))
.map(aa -> "Publication".equals(aa.getClazz().getSimpleName()) ? (Publication) aa.getPayload() : null)
.orElse(null);
}
public static Dataset patchDataset(String input) {
return Optional
.ofNullable(patchAtomicAction(input))
.map(aa -> "Dataset".equals(aa.getClazz().getSimpleName()) ? (Dataset) aa.getPayload() : null)
.orElse(null);
}
public static Relation patchRelation(String input) {
return Optional
.ofNullable(patchAtomicAction(input))
.map(aa -> "Relation".equals(aa.getClazz().getSimpleName()) ? (Relation) aa.getPayload() : null)
.orElse(null);
}
public static AtomicActionSandro patchAtomicAction(String input) {
try {
AtomicActionSandro aa = parse(input);
switch (aa.getClazz().getSimpleName()) {
case "Publication":
return patchPublication((Publication) aa.getPayload());
case "Dataset":
return patchDataset((Dataset) aa.getPayload());
case "Relation":
return patchRelation((Relation) aa.getPayload());
default:
return null;
}
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
public static AtomicActionSandro parse(String input) throws IOException {
return mapper.readValue(input, AtomicActionSandro.class);
}
public static <T extends Oaf> AtomicActionSandro<T> asAtomicAction(T oaf) {
return new AtomicActionSandro<T>((Class<T>) oaf.getClass(), oaf);
}
private static AtomicActionSandro patchRelation(Relation relation) {
if ("IsCitedBy".equals(relation.getRelClass())) {
relation.setRelClass(ModelConstants.IS_CITED_BY);
}
if ("IsReviewedBy".equals(relation.getRelClass())) {
relation.setRelClass(ModelConstants.IS_REVIEWED_BY);
}
relation.getCollectedfrom().forEach(cf -> {
final Map<String, KeyValue> map = DLIToOAF.collectedFromMap();
if (map.contains(cf.getKey())) {
KeyValue oa_cf = map.get(cf.getKey()).get();
cf.setKey(oa_cf.getKey());
cf.setValue(oa_cf.getValue());
}
});
return new AtomicActionSandro<>(Relation.class, relation);
}
private static AtomicActionSandro patchDataset(Dataset dataset) {
return new AtomicActionSandro<>(Dataset.class, patchEntity(dataset));
}
private static AtomicActionSandro<Publication> patchPublication(Publication publication) {
return new AtomicActionSandro<>(Publication.class, patchEntity(publication));
}
private static <T extends Result> T patchEntity(T result) {
final List<Instance> instance = Optional
.ofNullable(result.getInstance())
.orElseThrow(() -> new IllegalStateException("record with missing instance: " + result.getId()));
instance.forEach(i -> {
i.setPid(Lists.newArrayList());
i.setAlternateIdentifier(result.getPid());
i
.setAccessright(
OafMapperUtils
.accessRight(
ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_ACCESS_MODES,
ModelConstants.DNET_ACCESS_MODES));
});
Optional
.ofNullable(result.getExternalReference())
.orElse(Lists.newArrayList())
.forEach(e -> e.setAlternateLabel(Lists.newArrayList()));
String newId = IdentifierFactory.createDOIBoostIdentifier(result);
if (newId != null && !newId.equals(result.getId())) {
result.getOriginalId().add(result.getId());
result.setId(newId);
}
return result;
}
}

View File

@ -0,0 +1,31 @@
package eu.dnetlib.dhp.export;
import java.io.IOException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.*;
import eu.dnetlib.dhp.schema.action.AtomicActionDeserializer;
import eu.dnetlib.dhp.schema.oaf.Oaf;
public class TolerantAtomicActionDeserializer extends AtomicActionDeserializer {
public TolerantAtomicActionDeserializer() {
}
@Override
public AtomicActionSandro deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException {
JsonNode node = (JsonNode) jp.getCodec().readTree(jp);
String classTag = node.get("clazz").asText();
JsonNode payload = node.get("payload");
ObjectMapper mapper = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
try {
Class<?> clazz = Class.forName(classTag);
return new AtomicActionSandro(clazz, (Oaf) mapper.readValue(payload.toString(), clazz));
} catch (ClassNotFoundException var8) {
throw new IOException(var8);
}
}
}

View File

@ -1,8 +1,9 @@
package eu.dnetlib.dhp.export
package eu.dnetlib.dhp.`export`
import eu.dnetlib.dhp.`export`.DLIToOAF
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import eu.dnetlib.dhp.provision.scholix.Scholix
import eu.dnetlib.dhp.provision.scholix.summary.ScholixSummary
import eu.dnetlib.dhp.schema.oaf.Relation

View File

@ -0,0 +1,58 @@
package eu.dnetlib.dhp.export;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.*;
public class ScholexplorerDumpUpdateTest {
@Test
void testPatchScholexplorer() throws Exception {
final List<String> json = IOUtils.readLines(getClass().getResourceAsStream("scholexplorer_actions.json"));
json.stream().map(ScholexplorerPatcher::patchAtomicAction).filter(Objects::nonNull).forEach(aa -> {
Oaf o = aa.getPayload();
if (o instanceof Result) {
final Result r = (Result) aa.getPayload();
System.out.println(r.getId());
Map<String, StructuredProperty> pids = r
.getPid()
.stream()
.collect(
Collectors
.toMap(
k -> k.getValue(),
v -> v));
r.getInstance().forEach(i -> {
Assertions.assertTrue(i.getPid().isEmpty());
Assertions.assertTrue(!i.getAlternateIdentifier().isEmpty());
Assertions.assertEquals("UNKNOWN", i.getAccessright().getClassid());
Assertions
.assertTrue(
pids
.keySet()
.containsAll(i.getPid().stream().map(p -> p.getValue()).collect(Collectors.toSet())));
});
}
});
}
}