Merge branch 'graph_cleaning'
This commit is contained in:
commit
4bcad1c9c3
|
@ -14,6 +14,7 @@ public class ModelConstants {
|
||||||
public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource";
|
public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource";
|
||||||
public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
|
public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
|
||||||
public static final String DNET_COUNTRY_TYPE = "dnet:countries";
|
public static final String DNET_COUNTRY_TYPE = "dnet:countries";
|
||||||
|
public static final String DNET_REVIEW_LEVELS = "dnet:review_levels";
|
||||||
|
|
||||||
public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository";
|
public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository";
|
||||||
public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry";
|
public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry";
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class Instance implements Serializable {
|
||||||
// typed results
|
// typed results
|
||||||
private Field<String> processingchargecurrency;
|
private Field<String> processingchargecurrency;
|
||||||
|
|
||||||
private Field<String> refereed; // peer-review status
|
private Qualifier refereed; // peer-review status
|
||||||
|
|
||||||
public Field<String> getLicense() {
|
public Field<String> getLicense() {
|
||||||
return license;
|
return license;
|
||||||
|
@ -113,11 +113,11 @@ public class Instance implements Serializable {
|
||||||
this.processingchargecurrency = processingchargecurrency;
|
this.processingchargecurrency = processingchargecurrency;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Field<String> getRefereed() {
|
public Qualifier getRefereed() {
|
||||||
return refereed;
|
return refereed;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setRefereed(Field<String> refereed) {
|
public void setRefereed(Qualifier refereed) {
|
||||||
this.refereed = refereed;
|
this.refereed = refereed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -254,28 +254,25 @@ public class Result extends OafEntity implements Serializable {
|
||||||
final StructuredProperty p = baseMainTitle;
|
final StructuredProperty p = baseMainTitle;
|
||||||
title = title.stream().filter(t -> t != p).collect(Collectors.toList());
|
title = title.stream().filter(t -> t != p).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
//
|
|
||||||
//
|
|
||||||
// title.remove(baseMainTitle);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
StructuredProperty newMainTitle = null;
|
StructuredProperty newMainTitle = null;
|
||||||
if (r.getTitle() != null) {
|
if (r.getTitle() != null) {
|
||||||
newMainTitle = getMainTitle(r.getTitle());
|
newMainTitle = getMainTitle(r.getTitle());
|
||||||
if (newMainTitle != null) {
|
if (newMainTitle != null && title != null) {
|
||||||
final StructuredProperty p = newMainTitle;
|
final StructuredProperty p = newMainTitle;
|
||||||
title = title.stream().filter(t -> t != p).collect(Collectors.toList());
|
title = title.stream().filter(t -> t != p).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
// r.getTitle().remove(newMainTitle);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newMainTitle != null && compareTrust(this, r) < 0)
|
if (newMainTitle != null && compareTrust(this, r) < 0) {
|
||||||
baseMainTitle = newMainTitle;
|
baseMainTitle = newMainTitle;
|
||||||
|
}
|
||||||
|
|
||||||
title = mergeLists(title, r.getTitle());
|
title = mergeLists(title, r.getTitle());
|
||||||
if (title != null && baseMainTitle != null)
|
if (title != null && baseMainTitle != null) {
|
||||||
title.add(baseMainTitle);
|
title.add(baseMainTitle);
|
||||||
|
}
|
||||||
|
|
||||||
relevantdate = mergeLists(relevantdate, r.getRelevantdate());
|
relevantdate = mergeLists(relevantdate, r.getRelevantdate());
|
||||||
|
|
||||||
|
|
|
@ -96,12 +96,21 @@ public class ProtoConverter implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.distinct()
|
.distinct()
|
||||||
.collect(Collectors.toCollection(ArrayList::new)) : null);
|
.collect(Collectors.toCollection(ArrayList::new)) : null);
|
||||||
i.setRefereed(mapStringField(ri.getRefereed()));
|
i.setRefereed(mapRefereed(ri.getRefereed()));
|
||||||
i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
|
i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
|
||||||
i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
|
i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Qualifier mapRefereed(FieldTypeProtos.StringField refereed) {
|
||||||
|
Qualifier q = new Qualifier();
|
||||||
|
q.setClassid(refereed.getValue());
|
||||||
|
q.setSchemename(refereed.getValue());
|
||||||
|
q.setSchemeid("dnet:review_levels");
|
||||||
|
q.setSchemename("dnet:review_levels");
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
|
||||||
private static List<ExternalReference> convertExternalRefs(OafProtos.Oaf oaf) {
|
private static List<ExternalReference> convertExternalRefs(OafProtos.Oaf oaf) {
|
||||||
ResultProtos.Result r = oaf.getEntity().getResult();
|
ResultProtos.Result r = oaf.getEntity().getResult();
|
||||||
if (r.getExternalReferenceCount() > 0) {
|
if (r.getExternalReferenceCount() > 0) {
|
||||||
|
|
|
@ -8,6 +8,7 @@ import java.io.File;
|
||||||
|
|
||||||
import org.junit.jupiter.api.AfterEach;
|
import org.junit.jupiter.api.AfterEach;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
@ -19,6 +20,7 @@ import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
|
||||||
import eu.dnetlib.message.Message;
|
import eu.dnetlib.message.Message;
|
||||||
import eu.dnetlib.message.MessageManager;
|
import eu.dnetlib.message.MessageManager;
|
||||||
|
|
||||||
|
@Disabled
|
||||||
public class DnetCollectorWorkerApplicationTests {
|
public class DnetCollectorWorkerApplicationTests {
|
||||||
|
|
||||||
private final ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class);
|
private final ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class);
|
||||||
|
|
|
@ -166,8 +166,10 @@ case object Crossref2Oaf {
|
||||||
|
|
||||||
val has_review = (json \ "relation" \"has-review" \ "id")
|
val has_review = (json \ "relation" \"has-review" \ "id")
|
||||||
|
|
||||||
if(has_review != JNothing)
|
if(has_review != JNothing) {
|
||||||
instance.setRefereed(asField("peerReviewed"))
|
instance.setRefereed(
|
||||||
|
createQualifier("0001", "peerReviewed", "dnet:review_levels", "dnet:review_levels"))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
instance.setAccessright(getRestrictedQualifier())
|
instance.setAccessright(getRestrictedQualifier())
|
||||||
|
|
|
@ -0,0 +1,119 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.beans.BeanInfo;
|
||||||
|
import java.beans.IntrospectionException;
|
||||||
|
import java.beans.Introspector;
|
||||||
|
import java.beans.PropertyDescriptor;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
import scala.Predef;
|
||||||
|
|
||||||
|
public class CleanGraphProperties {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(CleanGraphProperties.class);
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
CleanGraphProperties.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/input_clean_graph_parameters.json"));
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
String inputPath = parser.get("inputPath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
String isLookupUrl = parser.get("isLookupUrl");
|
||||||
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
|
||||||
|
String graphTableClassName = parser.get("graphTableClassName");
|
||||||
|
log.info("graphTableClassName: {}", graphTableClassName);
|
||||||
|
|
||||||
|
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
|
||||||
|
|
||||||
|
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
removeOutputDir(spark, outputPath);
|
||||||
|
fixGraphTable(spark, vocs, inputPath, entityClazz, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends Oaf> void fixGraphTable(
|
||||||
|
SparkSession spark,
|
||||||
|
VocabularyGroup vocs,
|
||||||
|
String inputPath,
|
||||||
|
Class<T> clazz,
|
||||||
|
String outputPath) {
|
||||||
|
|
||||||
|
CleaningRule<T> rule = new CleaningRule<>(vocs);
|
||||||
|
|
||||||
|
readTableFromPath(spark, inputPath, clazz)
|
||||||
|
.map(rule, Encoders.bean(clazz))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.parquet(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends Oaf> Dataset<T> readTableFromPath(
|
||||||
|
SparkSession spark, String inputEntityPath, Class<T> clazz) {
|
||||||
|
|
||||||
|
log.info("Reading Graph table from: {}", inputEntityPath);
|
||||||
|
return spark
|
||||||
|
.read()
|
||||||
|
.textFile(inputEntityPath)
|
||||||
|
.map(
|
||||||
|
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
|
||||||
|
Encoders.bean(clazz));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void removeOutputDir(SparkSession spark, String path) {
|
||||||
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,73 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
public class CleaningRule<T extends Oaf> implements MapFunction<T, T> {
|
||||||
|
|
||||||
|
private VocabularyGroup vocabularies;
|
||||||
|
|
||||||
|
private Map<Class, Function<Object, Object>> mapping = Maps.newHashMap();
|
||||||
|
|
||||||
|
|
||||||
|
public CleaningRule(VocabularyGroup vocabularies) {
|
||||||
|
this.vocabularies = vocabularies;
|
||||||
|
|
||||||
|
mapping.put(Qualifier.class, o -> patchQualifier(o));
|
||||||
|
mapping.put(StructuredProperty.class, o -> patchSp(o));
|
||||||
|
mapping.put(Field.class, o -> patchStringField(o));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public T call(T value) throws Exception {
|
||||||
|
|
||||||
|
OafNavigator.apply(value, mapping);
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Object patchQualifier(Object o) {
|
||||||
|
Qualifier q = (Qualifier) o;
|
||||||
|
if (vocabularies.vocabularyExists(q.getSchemeid())) {
|
||||||
|
return vocabularies.lookup(q.getSchemeid(), q.getClassid());
|
||||||
|
}
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Object patchSp(Object o) {
|
||||||
|
StructuredProperty sp = (StructuredProperty) o;
|
||||||
|
if (StringUtils.isBlank(sp.getValue())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Object patchStringField(Object o) {
|
||||||
|
Field f = (Field) o;
|
||||||
|
try {
|
||||||
|
if (StringUtils.isBlank((String) f.getValue())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} catch (ClassCastException e) {
|
||||||
|
// ignored on purpose
|
||||||
|
}
|
||||||
|
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
public VocabularyGroup getVocabularies() {
|
||||||
|
return vocabularies;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,151 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
import java.beans.BeanInfo;
|
||||||
|
import java.beans.IntrospectionException;
|
||||||
|
import java.beans.Introspector;
|
||||||
|
import java.beans.PropertyDescriptor;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
public class OafNavigator {
|
||||||
|
|
||||||
|
public static <E extends Oaf> E apply(E oaf, Map<Class, Function<Object, Object>> mapping) {
|
||||||
|
reflect(oaf, mapping);
|
||||||
|
return oaf;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void reflect(Object o, Map<Class, Function<Object, Object>> mapping) {
|
||||||
|
visit(o, mapping);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void visit(final Object thingy, Map<Class, Function<Object, Object>> mapping) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
final Class<?> clazz = thingy.getClass();
|
||||||
|
|
||||||
|
if (!isPrimitive(thingy) && clazz.getPackage().equals(Oaf.class.getPackage())) {
|
||||||
|
|
||||||
|
final BeanInfo beanInfo = Introspector.getBeanInfo(clazz);
|
||||||
|
|
||||||
|
for (final PropertyDescriptor descriptor : beanInfo.getPropertyDescriptors()) {
|
||||||
|
try {
|
||||||
|
final Object value = descriptor.getReadMethod().invoke(thingy);
|
||||||
|
|
||||||
|
if (value != null && !isPrimitive(value)) {
|
||||||
|
|
||||||
|
System.out.println("VISITING " + descriptor.getName() + " " + descriptor.getPropertyType());
|
||||||
|
|
||||||
|
if (Iterable.class.isAssignableFrom(descriptor.getPropertyType())) {
|
||||||
|
for(Object vi : (Iterable) value) {
|
||||||
|
|
||||||
|
visit(vi, mapping);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
|
||||||
|
if (mapping.keySet().contains(value.getClass())) {
|
||||||
|
final Object newValue = mapping.get(value.getClass()).apply(value);
|
||||||
|
System.out.println("PATCHING " + descriptor.getName()+ " " + descriptor.getPropertyType());
|
||||||
|
System.out.println("OLD VALUE " + getObjectMapper().writeValueAsString(value));
|
||||||
|
System.out.println("NEW VALUE " + getObjectMapper().writeValueAsString(newValue));
|
||||||
|
descriptor.getWriteMethod().invoke(newValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
visit(value, mapping);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (final IllegalArgumentException e) {
|
||||||
|
// handle this please
|
||||||
|
} catch (final IllegalAccessException e) {
|
||||||
|
// and this also
|
||||||
|
} catch (final InvocationTargetException e) {
|
||||||
|
// and this, too
|
||||||
|
} catch (JsonProcessingException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (final IntrospectionException e) {
|
||||||
|
// do something sensible here
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ObjectMapper getObjectMapper() {
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
return mapper;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void navigate(Object o, Map<Class, Function<Object, Object>> mapping) {
|
||||||
|
if (Objects.isNull(o) || isPrimitive(o)) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
for (Field field : getAllFields(o.getClass())) {
|
||||||
|
System.out.println(field.getName());
|
||||||
|
field.setAccessible(true);
|
||||||
|
Object value = field.get(o);
|
||||||
|
|
||||||
|
if (Objects.nonNull(value)) {
|
||||||
|
final Class<?> fieldType = field.getType();
|
||||||
|
if ((fieldType.isArray() && !fieldType.getComponentType().isPrimitive())) {
|
||||||
|
Object[] fs = (Object[]) value;
|
||||||
|
for (Object fi : fs) {
|
||||||
|
navigate(fi, mapping);
|
||||||
|
}
|
||||||
|
} if (Iterable.class.isAssignableFrom(fieldType)) {
|
||||||
|
Iterable fs = (Iterable) value;
|
||||||
|
for (Object fi : fs) {
|
||||||
|
navigate(fi, mapping);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (mapping.keySet().contains(value.getClass())) {
|
||||||
|
System.out.println("PATCHING " + field.getName());
|
||||||
|
field.set(o, mapping.get(value.getClass()).apply(value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (IllegalAccessException | IllegalArgumentException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isPrimitive(Object o) {
|
||||||
|
return o.getClass().isPrimitive()
|
||||||
|
|| o instanceof Class
|
||||||
|
|| o instanceof Integer
|
||||||
|
|| o instanceof Double
|
||||||
|
|| o instanceof Float
|
||||||
|
|| o instanceof Long
|
||||||
|
|| o instanceof Boolean
|
||||||
|
|| o instanceof String
|
||||||
|
|| o instanceof Byte;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Field> getAllFields(Class<?> clazz) {
|
||||||
|
return getAllFields(new LinkedList<>(), clazz);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Field> getAllFields(List<Field> fields, Class<?> clazz) {
|
||||||
|
fields.addAll(Arrays.asList(clazz.getDeclaredFields()));
|
||||||
|
|
||||||
|
final Class<?> superclass = clazz.getSuperclass();
|
||||||
|
if (Objects.nonNull(superclass) && superclass.getPackage().equals(Oaf.class.getPackage())) {
|
||||||
|
getAllFields(fields, superclass);
|
||||||
|
}
|
||||||
|
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,94 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
public class OafNavigator2 {
|
||||||
|
|
||||||
|
public static <E extends Oaf> E apply(E oaf, Map<Class, Function<Object, Object>> mapping) {
|
||||||
|
navigate(oaf, mapping);
|
||||||
|
return oaf;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void navigate(Object o, Map<Class, Function<Object, Object>> mapping) {
|
||||||
|
if (Objects.isNull(o) || isPrimitive(o)) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
for (Field field : getAllFields(o.getClass())) {
|
||||||
|
System.out.println("VISITING " + field.getName() + " in " + o.getClass());
|
||||||
|
field.setAccessible(true);
|
||||||
|
Object value = field.get(o);
|
||||||
|
|
||||||
|
if (Objects.nonNull(value)) {
|
||||||
|
final Class<?> fieldType = field.getType();
|
||||||
|
if ((fieldType.isArray() && !fieldType.getComponentType().isPrimitive())) {
|
||||||
|
Object[] fs = (Object[]) value;
|
||||||
|
for (Object fi : fs) {
|
||||||
|
navigate(fi, mapping);
|
||||||
|
}
|
||||||
|
} if (Iterable.class.isAssignableFrom(fieldType)) {
|
||||||
|
Iterable fs = (Iterable) value;
|
||||||
|
for (Object fi : fs) {
|
||||||
|
navigate(fi, mapping);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
final Function<Object, Object> cleaningFn = mapping.get(value.getClass());
|
||||||
|
if (Objects.nonNull(cleaningFn)) {
|
||||||
|
final Object newValue = cleaningFn.apply(value);
|
||||||
|
if (!Objects.equals(value, newValue)) {
|
||||||
|
System.out.println("PATCHING " + field.getName()+ " " + value.getClass());
|
||||||
|
System.out.println("OLD VALUE " + getObjectMapper().writeValueAsString(value));
|
||||||
|
System.out.println("NEW VALUE " + getObjectMapper().writeValueAsString(newValue));
|
||||||
|
field.set(o, newValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (IllegalAccessException | IllegalArgumentException | JsonProcessingException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ObjectMapper getObjectMapper() {
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
return mapper;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isPrimitive(Object o) {
|
||||||
|
return o.getClass().isPrimitive()
|
||||||
|
|| o instanceof Class
|
||||||
|
|| o instanceof Integer
|
||||||
|
|| o instanceof Double
|
||||||
|
|| o instanceof Float
|
||||||
|
|| o instanceof Long
|
||||||
|
|| o instanceof Boolean
|
||||||
|
|| o instanceof String
|
||||||
|
|| o instanceof Byte;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Field> getAllFields(Class<?> clazz) {
|
||||||
|
return getAllFields(new LinkedList<>(), clazz);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Field> getAllFields(List<Field> fields, Class<?> clazz) {
|
||||||
|
fields.addAll(Arrays.asList(clazz.getDeclaredFields()));
|
||||||
|
|
||||||
|
final Class<?> superclass = clazz.getSuperclass();
|
||||||
|
if (Objects.nonNull(superclass) && superclass.getPackage().equals(Oaf.class.getPackage())) {
|
||||||
|
getAllFields(fields, superclass);
|
||||||
|
}
|
||||||
|
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -39,6 +39,8 @@ import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class GenerateEntitiesApplication {
|
public class GenerateEntitiesApplication {
|
||||||
|
@ -71,7 +73,8 @@ public class GenerateEntitiesApplication {
|
||||||
final String isLookupUrl = parser.get("isLookupUrl");
|
final String isLookupUrl = parser.get("isLookupUrl");
|
||||||
log.info("isLookupUrl: {}", isLookupUrl);
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
|
||||||
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
|
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService);
|
||||||
|
|
||||||
final SparkConf conf = new SparkConf();
|
final SparkConf conf = new SparkConf();
|
||||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||||
|
@ -139,9 +142,11 @@ public class GenerateEntitiesApplication {
|
||||||
switch (type.toLowerCase()) {
|
switch (type.toLowerCase()) {
|
||||||
case "oaf-store-claim":
|
case "oaf-store-claim":
|
||||||
case "oaf-store-cleaned":
|
case "oaf-store-cleaned":
|
||||||
|
case "oaf-store-claim":
|
||||||
return new OafToOafMapper(vocs, false).processMdRecord(s);
|
return new OafToOafMapper(vocs, false).processMdRecord(s);
|
||||||
case "odf-store-claim":
|
case "odf-store-claim":
|
||||||
case "odf-store-cleaned":
|
case "odf-store-cleaned":
|
||||||
|
case "odf-store-claim":
|
||||||
return new OdfToOafMapper(vocs, false).processMdRecord(s);
|
return new OdfToOafMapper(vocs, false).processMdRecord(s);
|
||||||
case "oaf-store-intersection":
|
case "oaf-store-intersection":
|
||||||
return new OafToOafMapper(vocs, true).processMdRecord(s);
|
return new OafToOafMapper(vocs, true).processMdRecord(s);
|
||||||
|
|
|
@ -71,6 +71,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
|
||||||
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
|
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
|
||||||
|
|
||||||
|
@ -151,7 +152,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
super(hdfsPath);
|
super(hdfsPath);
|
||||||
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
||||||
this.lastUpdateTimestamp = new Date().getTime();
|
this.lastUpdateTimestamp = new Date().getTime();
|
||||||
this.vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
|
this.vocs = VocabularyGroup.loadVocsFromIS(ISLookupClientFactory.getLookUpService(isLookupUrl));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
|
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
|
||||||
|
|
|
@ -133,7 +133,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
instance
|
instance
|
||||||
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
|
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
|
||||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS));
|
||||||
instance
|
instance
|
||||||
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||||
instance
|
instance
|
||||||
|
|
|
@ -4,19 +4,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENTED_BY;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENT_TO;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PART;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.SUPPLEMENT;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -129,7 +117,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
instance
|
instance
|
||||||
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
|
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
|
||||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS));
|
||||||
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||||
instance
|
instance
|
||||||
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||||
|
|
|
@ -60,6 +60,10 @@ public class OafMapperUtils {
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Qualifier unknown(final String schemeid, final String schemename) {
|
||||||
|
return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
|
||||||
|
}
|
||||||
|
|
||||||
public static Qualifier qualifier(
|
public static Qualifier qualifier(
|
||||||
final String classid,
|
final String classid,
|
||||||
final String classname,
|
final String classname,
|
||||||
|
|
|
@ -4,14 +4,29 @@ package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
|
||||||
public class Vocabulary implements Serializable {
|
public class Vocabulary implements Serializable {
|
||||||
|
|
||||||
private final String id;
|
private final String id;
|
||||||
private final String name;
|
private final String name;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Code to Term mappings for this Vocabulary.
|
||||||
|
*/
|
||||||
private final Map<String, VocabularyTerm> terms = new HashMap<>();
|
private final Map<String, VocabularyTerm> terms = new HashMap<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synonym to Code mappings for this Vocabulary.
|
||||||
|
*/
|
||||||
|
private final Map<String, String> synonyms = Maps.newHashMap();
|
||||||
|
|
||||||
public Vocabulary(final String id, final String name) {
|
public Vocabulary(final String id, final String name) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
|
@ -30,7 +45,7 @@ public class Vocabulary implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public VocabularyTerm getTerm(final String id) {
|
public VocabularyTerm getTerm(final String id) {
|
||||||
return terms.get(id.toLowerCase());
|
return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void addTerm(final String id, final String name) {
|
protected void addTerm(final String id, final String name) {
|
||||||
|
@ -40,4 +55,32 @@ public class Vocabulary implements Serializable {
|
||||||
protected boolean termExists(final String id) {
|
protected boolean termExists(final String id) {
|
||||||
return terms.containsKey(id.toLowerCase());
|
return terms.containsKey(id.toLowerCase());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void addSynonym(final String syn, final String termCode) {
|
||||||
|
synonyms.put(syn, termCode.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
public VocabularyTerm getTermBySynonym(final String syn) {
|
||||||
|
return getTerm(synonyms.get(syn));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Qualifier getTermAsQualifier(final String termId) {
|
||||||
|
if (StringUtils.isBlank(termId)) {
|
||||||
|
return OafMapperUtils.unknown(getId(), getName());
|
||||||
|
} else if (termExists(termId)) {
|
||||||
|
final VocabularyTerm t = getTerm(termId);
|
||||||
|
return OafMapperUtils.qualifier(t.getId(), t.getName(), getId(), getName());
|
||||||
|
} else {
|
||||||
|
return OafMapperUtils.qualifier(termId, termId, getId(), getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Qualifier getSynonymAsQualifier(final String syn) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(getTermBySynonym(syn))
|
||||||
|
.map(term -> getTermAsQualifier(term.getId()))
|
||||||
|
.orElse(null);
|
||||||
|
// .orElse(OafMapperUtils.unknown(getId(), getName()));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,33 +1,39 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.*;
|
||||||
import java.util.Map;
|
import java.util.function.Supplier;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
public class VocabularyGroup implements Serializable {
|
public class VocabularyGroup implements Serializable {
|
||||||
|
|
||||||
public static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException {
|
public static final String VOCABULARIES_XQUERY = "for $x in collection(' /db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') \n"
|
||||||
final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
+
|
||||||
|
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
|
||||||
|
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
|
||||||
|
"for $term in ($x//TERM)\n" +
|
||||||
|
"return concat($vocid,' @=@ ',$vocname,' @=@ ',$term/@code,' @=@ ',$term/@english_name)";
|
||||||
|
|
||||||
final String xquery = IOUtils
|
public static final String VOCABULARY_SYNONYMS_XQUERY = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')\n"
|
||||||
.toString(
|
+
|
||||||
GenerateEntitiesApplication.class
|
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery"));
|
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
|
||||||
|
"for $term in ($x//TERM)\n" +
|
||||||
|
"for $syn in ($term//SYNONYM/@term)\n" +
|
||||||
|
"return concat($vocid,' @=@ ',$term/@code,' @=@ ', $syn)\n";
|
||||||
|
|
||||||
|
public static VocabularyGroup loadVocsFromIS(ISLookUpService isLookUpService) throws ISLookUpException {
|
||||||
|
|
||||||
final VocabularyGroup vocs = new VocabularyGroup();
|
final VocabularyGroup vocs = new VocabularyGroup();
|
||||||
|
|
||||||
for (final String s : isLookUpService.quickSearchProfile(xquery)) {
|
for (final String s : isLookUpService.quickSearchProfile(VOCABULARIES_XQUERY)) {
|
||||||
final String[] arr = s.split("@=@");
|
final String[] arr = s.split("@=@");
|
||||||
if (arr.length == 4) {
|
if (arr.length == 4) {
|
||||||
final String vocId = arr[0].trim();
|
final String vocId = arr[0].trim();
|
||||||
|
@ -40,6 +46,19 @@ public class VocabularyGroup implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
vocs.addTerm(vocId, termId, termName);
|
vocs.addTerm(vocId, termId, termName);
|
||||||
|
vocs.addSynonyms(vocId, termId, termId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (final String s : isLookUpService.quickSearchProfile(VOCABULARY_SYNONYMS_XQUERY)) {
|
||||||
|
final String[] arr = s.split("@=@");
|
||||||
|
if (arr.length == 3) {
|
||||||
|
final String vocId = arr[0].trim();
|
||||||
|
final String termId = arr[1].trim();
|
||||||
|
final String syn = arr[2].trim();
|
||||||
|
|
||||||
|
vocs.addSynonyms(vocId, termId, syn);
|
||||||
|
vocs.addSynonyms(vocId, termId, termId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,16 +85,37 @@ public class VocabularyGroup implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Qualifier getTermAsQualifier(final String vocId, final String id) {
|
public Set<String> getTerms(String vocId) {
|
||||||
if (StringUtils.isBlank(id)) {
|
if (!vocabularyExists(vocId)) {
|
||||||
return OafMapperUtils.qualifier("UNKNOWN", "UNKNOWN", vocId, vocId);
|
return new HashSet<>();
|
||||||
} else if (termExists(vocId, id)) {
|
|
||||||
final Vocabulary v = vocs.get(vocId.toLowerCase());
|
|
||||||
final VocabularyTerm t = v.getTerm(id);
|
|
||||||
return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName());
|
|
||||||
} else {
|
|
||||||
return OafMapperUtils.qualifier(id, id, vocId, vocId);
|
|
||||||
}
|
}
|
||||||
|
return vocs
|
||||||
|
.get(vocId.toLowerCase())
|
||||||
|
.getTerms()
|
||||||
|
.values()
|
||||||
|
.stream()
|
||||||
|
.map(t -> t.getId())
|
||||||
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Qualifier lookup(String vocId, String id) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(getSynonymAsQualifier(vocId, id))
|
||||||
|
.orElse(getTermAsQualifier(vocId, id));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Qualifier getTermAsQualifier(final String vocId, final String id) {
|
||||||
|
if (vocabularyExists(vocId)) {
|
||||||
|
return vocs.get(vocId.toLowerCase()).getTermAsQualifier(id);
|
||||||
|
}
|
||||||
|
return OafMapperUtils.qualifier(id, id, "", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Qualifier getSynonymAsQualifier(final String vocId, final String syn) {
|
||||||
|
if (StringUtils.isBlank(vocId)) {
|
||||||
|
return OafMapperUtils.unknown("", "");
|
||||||
|
}
|
||||||
|
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean termExists(final String vocId, final String id) {
|
public boolean termExists(final String vocId, final String id) {
|
||||||
|
@ -86,4 +126,16 @@ public class VocabularyGroup implements Serializable {
|
||||||
return vocs.containsKey(vocId.toLowerCase());
|
return vocs.containsKey(vocId.toLowerCase());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void addSynonyms(final String vocId, final String termId, final String syn) {
|
||||||
|
String id = Optional
|
||||||
|
.ofNullable(vocId)
|
||||||
|
.map(s -> s.toLowerCase())
|
||||||
|
.orElseThrow(
|
||||||
|
() -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]")));
|
||||||
|
Optional
|
||||||
|
.ofNullable(vocs.get(id))
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
||||||
|
.addSynonym(syn, termId);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>jobTracker</name>
|
||||||
|
<value>yarnRM</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>nameNode</name>
|
||||||
|
<value>hdfs://nameservice1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.use.system.libpath</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozie.action.sharelib.for.spark</name>
|
||||||
|
<value>spark2</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,289 @@
|
||||||
|
<workflow-app name="clean graph" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
|
||||||
|
<parameters>
|
||||||
|
<property>
|
||||||
|
<name>graphInputPath</name>
|
||||||
|
<description>the input path to read graph content</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>graphOutputPath</name>
|
||||||
|
<description>the target path to store cleaned graph</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>isLookupUrl</name>
|
||||||
|
<description>the address of the lookUp service</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>sparkDriverMemory</name>
|
||||||
|
<description>memory for driver process</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorMemory</name>
|
||||||
|
<description>memory for individual executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>sparkExecutorCores</name>
|
||||||
|
<description>number of cores used by single executor</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>oozieActionShareLibForSpark2</name>
|
||||||
|
<description>oozie action sharelib for spark 2.*</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2ExtraListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||||
|
<description>spark 2.* extra listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2SqlQueryExecutionListeners</name>
|
||||||
|
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||||
|
<description>spark 2.* sql query execution listeners classname</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2YarnHistoryServerAddress</name>
|
||||||
|
<description>spark 2.* yarn history server address</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>spark2EventLogDir</name>
|
||||||
|
<description>spark 2.* event log dir location</description>
|
||||||
|
</property>
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<start to="fork_clean_graph"/>
|
||||||
|
|
||||||
|
<kill name="Kill">
|
||||||
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
|
</kill>
|
||||||
|
|
||||||
|
<fork name="fork_clean_graph">
|
||||||
|
<path start="clean_publication"/>
|
||||||
|
<path start="clean_dataset"/>
|
||||||
|
<path start="clean_otherresearchproduct"/>
|
||||||
|
<path start="clean_software"/>
|
||||||
|
<path start="clean_datasource"/>
|
||||||
|
<path start="clean_organization"/>
|
||||||
|
<path start="clean_project"/>
|
||||||
|
<path start="clean_relation"/>
|
||||||
|
</fork>
|
||||||
|
|
||||||
|
<action name="clean_publication">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Clean publications</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||||
|
--driver-memory=${sparkDriverMemoryForJoining}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--inputPath</arg><arg>${graphInputPath}/publication</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${graphOutputPath}/publication</arg>
|
||||||
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="wait_clean"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="clean_dataset">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Clean datasets</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||||
|
--driver-memory=${sparkDriverMemoryForJoining}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--inputPath</arg><arg>${graphInputPath}/dataset</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${graphOutputPath}/dataset</arg>
|
||||||
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="wait_clean"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="clean_otherresearchproduct">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Clean otherresearchproducts</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||||
|
--driver-memory=${sparkDriverMemoryForJoining}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--inputPath</arg><arg>${graphInputPath}/otherresearchproduct</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg>
|
||||||
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="wait_clean"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="clean_software">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Clean softwares</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||||
|
--driver-memory=${sparkDriverMemoryForJoining}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--inputPath</arg><arg>${graphInputPath}/software</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${graphOutputPath}/software</arg>
|
||||||
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="wait_clean"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="clean_datasource">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Clean datasources</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||||
|
--driver-memory=${sparkDriverMemoryForJoining}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--inputPath</arg><arg>${graphInputPath}/datasource</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${graphOutputPath}/datasource</arg>
|
||||||
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="wait_clean"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="clean_organization">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Clean organizations</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||||
|
--driver-memory=${sparkDriverMemoryForJoining}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--inputPath</arg><arg>${graphInputPath}/organization</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${graphOutputPath}/organization</arg>
|
||||||
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="wait_clean"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="clean_project">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Clean projects</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||||
|
--driver-memory=${sparkDriverMemoryForJoining}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--inputPath</arg><arg>${graphInputPath}/project</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${graphOutputPath}/project</arg>
|
||||||
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="wait_clean"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="clean_relation">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>Clean relations</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||||
|
--driver-memory=${sparkDriverMemoryForJoining}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--inputPath</arg><arg>${graphInputPath}/relation</arg>
|
||||||
|
<arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg>
|
||||||
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="wait_clean"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<join name="wait_clean" to="End"/>
|
||||||
|
|
||||||
|
<end name="End"/>
|
||||||
|
</workflow-app>
|
|
@ -0,0 +1,32 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"paramName": "issm",
|
||||||
|
"paramLongName": "isSparkSessionManaged",
|
||||||
|
"paramDescription": "when true will stop SparkSession after job execution",
|
||||||
|
"paramRequired": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "in",
|
||||||
|
"paramLongName": "inputPath",
|
||||||
|
"paramDescription": "the path to the graph data dump to read",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "out",
|
||||||
|
"paramLongName": "outputPath",
|
||||||
|
"paramDescription": "the path to store the output graph",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "isu",
|
||||||
|
"paramLongName": "isLookupUrl",
|
||||||
|
"paramDescription": "url to the ISLookup Service",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "class",
|
||||||
|
"paramLongName": "graphTableClassName",
|
||||||
|
"paramDescription": "class name moelling the graph table",
|
||||||
|
"paramRequired": true
|
||||||
|
}
|
||||||
|
]
|
|
@ -0,0 +1,6 @@
|
||||||
|
for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')
|
||||||
|
let $vocid := $x//VOCABULARY_NAME/@code
|
||||||
|
let $vocname := $x//VOCABULARY_NAME/text()
|
||||||
|
for $term in ($x//TERM)
|
||||||
|
for $syn in ($term//SYNONYM/@term)
|
||||||
|
return concat($vocid,' @=@ ',$term/@code,' @=@ ', $syn)
|
|
@ -0,0 +1,112 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.junit.platform.commons.util.StringUtils;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyTerm;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
public class CleaningRuleTest {
|
||||||
|
|
||||||
|
public static final ObjectMapper MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
private VocabularyGroup vocabularies;
|
||||||
|
|
||||||
|
private CleaningRule<Publication> cleaningRule;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() throws ISLookUpException, IOException {
|
||||||
|
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||||
|
lenient()
|
||||||
|
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||||
|
.thenReturn(synonyms());
|
||||||
|
|
||||||
|
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||||
|
cleaningRule = new CleaningRule(vocabularies);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCleaning() throws Exception {
|
||||||
|
|
||||||
|
assertNotNull(cleaningRule.getVocabularies());
|
||||||
|
|
||||||
|
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json"));
|
||||||
|
Publication p_in = MAPPER.readValue(json, Publication.class);
|
||||||
|
|
||||||
|
Publication p_out = cleaningRule.call(p_in);
|
||||||
|
|
||||||
|
assertNotNull(p_out);
|
||||||
|
|
||||||
|
assertEquals("eng", p_out.getLanguage().getClassid());
|
||||||
|
assertEquals("English", p_out.getLanguage().getClassname());
|
||||||
|
|
||||||
|
assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid());
|
||||||
|
assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname());
|
||||||
|
|
||||||
|
assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid());
|
||||||
|
assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname());
|
||||||
|
|
||||||
|
Set<String> pidTerms = vocabularies.getTerms("dnet:pid_types");
|
||||||
|
assertTrue(
|
||||||
|
p_out
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.map(p -> p.getQualifier())
|
||||||
|
.allMatch(q -> pidTerms.contains(q.getClassid())));
|
||||||
|
|
||||||
|
// TODO add more assertions to verity the cleaned values
|
||||||
|
System.out.println(MAPPER.writeValueAsString(p_out));
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
p_out
|
||||||
|
.getPid()
|
||||||
|
.stream()
|
||||||
|
.allMatch(sp -> StringUtils.isNotBlank(sp.getValue())));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Stream<Qualifier> getAuthorPidTypes(Publication pub) {
|
||||||
|
return pub
|
||||||
|
.getAuthor()
|
||||||
|
.stream()
|
||||||
|
.map(a -> a.getPid())
|
||||||
|
.flatMap(p -> p.stream())
|
||||||
|
.map(s -> s.getQualifier());
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> vocs() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(CleaningRuleTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> synonyms() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(CleaningRuleTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,757 @@
|
||||||
|
{
|
||||||
|
"author": [
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Brien, Tom",
|
||||||
|
"name": "Tom",
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "ORCID12",
|
||||||
|
"classname": "ORCID12",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "0000-0001-9613-6639"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rank": 1,
|
||||||
|
"surname": "Brien"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Ade, Peter",
|
||||||
|
"name": "Peter",
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "xyz",
|
||||||
|
"classname": "XYZ",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "qwerty"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"rank": 2,
|
||||||
|
"surname": "Ade"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Barry, Peter S.",
|
||||||
|
"name": "Peter S.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 3,
|
||||||
|
"surname": "Barry"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Dunscombe, Chris J.",
|
||||||
|
"name": "Chris J.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 4,
|
||||||
|
"surname": "Dunscombe"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Leadley, David R.",
|
||||||
|
"name": "David R.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 5,
|
||||||
|
"surname": "Leadley"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Morozov, Dmitry V.",
|
||||||
|
"name": "Dmitry V.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 6,
|
||||||
|
"surname": "Morozov"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Myronov, Maksym",
|
||||||
|
"name": "Maksym",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 7,
|
||||||
|
"surname": "Myronov"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Parker, Evan",
|
||||||
|
"name": "Evan",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 8,
|
||||||
|
"surname": "Parker"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Prest, Martin J.",
|
||||||
|
"name": "Martin J.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 9,
|
||||||
|
"surname": "Prest"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Prunnila, Mika",
|
||||||
|
"name": "Mika",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 10,
|
||||||
|
"surname": "Prunnila"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Sudiwala, Rashmi V.",
|
||||||
|
"name": "Rashmi V.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 11,
|
||||||
|
"surname": "Sudiwala"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Whall, Terry E.",
|
||||||
|
"name": "Terry E.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 12,
|
||||||
|
"surname": "Whall"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Mauskopf",
|
||||||
|
"name": "",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 13,
|
||||||
|
"surname": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": " P. D. ",
|
||||||
|
"name": "",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 14,
|
||||||
|
"surname": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"bestaccessright": {
|
||||||
|
"classid": "CLOSED",
|
||||||
|
"classname": "Closed Access",
|
||||||
|
"schemeid": "dnet:access_modes",
|
||||||
|
"schemename": "dnet:access_modes"
|
||||||
|
},
|
||||||
|
"collectedfrom": [
|
||||||
|
{
|
||||||
|
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||||
|
"value": "VIRTA"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"context": [
|
||||||
|
],
|
||||||
|
"contributor": [
|
||||||
|
],
|
||||||
|
"country": [
|
||||||
|
],
|
||||||
|
"coverage": [
|
||||||
|
],
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"dateofacceptance": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "2016-01-01"
|
||||||
|
},
|
||||||
|
"dateofcollection": "",
|
||||||
|
"dateoftransformation": "2020-04-22T12:34:08.009Z",
|
||||||
|
"description": [
|
||||||
|
],
|
||||||
|
"externalReference": [
|
||||||
|
],
|
||||||
|
"extraInfo": [
|
||||||
|
],
|
||||||
|
"format": [
|
||||||
|
],
|
||||||
|
"fulltext": [
|
||||||
|
],
|
||||||
|
"id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"accessright": {
|
||||||
|
"classid": "CLOSED",
|
||||||
|
"classname": "CLOSED",
|
||||||
|
"schemeid": "dnet:access_modes",
|
||||||
|
"schemename": "dnet:access_modes"
|
||||||
|
},
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||||
|
"value": "VIRTA"
|
||||||
|
},
|
||||||
|
"dateofacceptance": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "2016-01-01"
|
||||||
|
},
|
||||||
|
"distributionlocation": "",
|
||||||
|
"hostedby": {
|
||||||
|
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||||
|
"value": "VIRTA"
|
||||||
|
},
|
||||||
|
"instancetype": {
|
||||||
|
"classid": "Comentario",
|
||||||
|
"classname": "Comentario",
|
||||||
|
"schemeid": "dnet:publication_resource",
|
||||||
|
"schemename": "dnet:publication_resource"
|
||||||
|
},
|
||||||
|
"url": [
|
||||||
|
"http://juuli.fi/Record/0275158616",
|
||||||
|
"http://dx.doi.org/10.1007/s109090161569x"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"journal": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"edition": "",
|
||||||
|
"ep": " 7",
|
||||||
|
"iss": "9 March",
|
||||||
|
"issnLinking": "",
|
||||||
|
"issnOnline": "",
|
||||||
|
"issnPrinted": "0022-2291",
|
||||||
|
"name": "Journal of Low Temperature Physics - Early Acces",
|
||||||
|
"sp": "1 ",
|
||||||
|
"vol": ""
|
||||||
|
},
|
||||||
|
"language": {
|
||||||
|
"classid": "en",
|
||||||
|
"classname": "en",
|
||||||
|
"schemeid": "dnet:languages",
|
||||||
|
"schemename": "dnet:languages"
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp": 1591283286319,
|
||||||
|
"oaiprovenance": {
|
||||||
|
"originDescription": {
|
||||||
|
"altered": true,
|
||||||
|
"baseURL": "https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif",
|
||||||
|
"datestamp": "2019-07-30",
|
||||||
|
"harvestDate": "2020-04-22T11:04:38.685Z",
|
||||||
|
"identifier": "oai:virta-jtp.csc.fi:Publications/0275158616",
|
||||||
|
"metadataNamespace": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"originalId": [
|
||||||
|
"CSC_________::2250a70c903c6ac6e4c01438259e9375"
|
||||||
|
],
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1007/s109090161569x"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1007/s109090161569x"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"relevantdate": [
|
||||||
|
],
|
||||||
|
"resourcetype": {
|
||||||
|
"classid": "0001",
|
||||||
|
"classname": "0001",
|
||||||
|
"schemeid": "dnet:dataCite_resource",
|
||||||
|
"schemename": "dnet:dataCite_resource"
|
||||||
|
},
|
||||||
|
"resulttype": {
|
||||||
|
"classid": "publication",
|
||||||
|
"classname": "publication",
|
||||||
|
"schemeid": "dnet:result_typologies",
|
||||||
|
"schemename": "dnet:result_typologies"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
],
|
||||||
|
"subject": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "ta213"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "infrared detectors"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "lens antennas"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "silicon"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "slot antennas"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "strained silicon"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "cold electron bolometers"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "doped silicon"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "measure noise"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "noise equivalent power"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "optical characterisation"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "optical response"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "photon noise"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "silicon absorbers"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "main title",
|
||||||
|
"classname": "main title",
|
||||||
|
"schemeid": "dnet:dataCite_title",
|
||||||
|
"schemename": "dnet:dataCite_title"
|
||||||
|
},
|
||||||
|
"value": "Optical response of strained- and unstrained-silicon cold-electron bolometers"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1168,10 +1168,10 @@ public class XmlRecordFactory implements Serializable {
|
||||||
.asXmlElement(
|
.asXmlElement(
|
||||||
"distributionlocation", instance.getDistributionlocation()));
|
"distributionlocation", instance.getDistributionlocation()));
|
||||||
}
|
}
|
||||||
if (instance.getRefereed() != null && isNotBlank(instance.getRefereed().getValue())) {
|
if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) {
|
||||||
fields
|
fields
|
||||||
.add(
|
.add(
|
||||||
XmlSerializationUtils.asXmlElement("refereed", instance.getRefereed().getValue()));
|
XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed()));
|
||||||
}
|
}
|
||||||
if (instance.getProcessingchargeamount() != null
|
if (instance.getProcessingchargeamount() != null
|
||||||
&& isNotBlank(instance.getProcessingchargeamount().getValue())) {
|
&& isNotBlank(instance.getProcessingchargeamount().getValue())) {
|
||||||
|
|
Loading…
Reference in New Issue