forked from D-Net/dnet-hadoop
Merge branch 'graph_cleaning'
This commit is contained in:
commit
4bcad1c9c3
|
@ -14,6 +14,7 @@ public class ModelConstants {
|
|||
public static final String DNET_DATA_CITE_RESOURCE = "dnet:dataCite_resource";
|
||||
public static final String DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
|
||||
public static final String DNET_COUNTRY_TYPE = "dnet:countries";
|
||||
public static final String DNET_REVIEW_LEVELS = "dnet:review_levels";
|
||||
|
||||
public static final String SYSIMPORT_CROSSWALK_REPOSITORY = "sysimport:crosswalk:repository";
|
||||
public static final String SYSIMPORT_CROSSWALK_ENTITYREGISTRY = "sysimport:crosswalk:entityregistry";
|
||||
|
|
|
@ -31,7 +31,7 @@ public class Instance implements Serializable {
|
|||
// typed results
|
||||
private Field<String> processingchargecurrency;
|
||||
|
||||
private Field<String> refereed; // peer-review status
|
||||
private Qualifier refereed; // peer-review status
|
||||
|
||||
public Field<String> getLicense() {
|
||||
return license;
|
||||
|
@ -113,11 +113,11 @@ public class Instance implements Serializable {
|
|||
this.processingchargecurrency = processingchargecurrency;
|
||||
}
|
||||
|
||||
public Field<String> getRefereed() {
|
||||
public Qualifier getRefereed() {
|
||||
return refereed;
|
||||
}
|
||||
|
||||
public void setRefereed(Field<String> refereed) {
|
||||
public void setRefereed(Qualifier refereed) {
|
||||
this.refereed = refereed;
|
||||
}
|
||||
|
||||
|
|
|
@ -254,28 +254,25 @@ public class Result extends OafEntity implements Serializable {
|
|||
final StructuredProperty p = baseMainTitle;
|
||||
title = title.stream().filter(t -> t != p).collect(Collectors.toList());
|
||||
}
|
||||
//
|
||||
//
|
||||
// title.remove(baseMainTitle);
|
||||
}
|
||||
|
||||
StructuredProperty newMainTitle = null;
|
||||
if (r.getTitle() != null) {
|
||||
newMainTitle = getMainTitle(r.getTitle());
|
||||
if (newMainTitle != null) {
|
||||
if (newMainTitle != null && title != null) {
|
||||
final StructuredProperty p = newMainTitle;
|
||||
title = title.stream().filter(t -> t != p).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
// r.getTitle().remove(newMainTitle);
|
||||
}
|
||||
|
||||
if (newMainTitle != null && compareTrust(this, r) < 0)
|
||||
if (newMainTitle != null && compareTrust(this, r) < 0) {
|
||||
baseMainTitle = newMainTitle;
|
||||
}
|
||||
|
||||
title = mergeLists(title, r.getTitle());
|
||||
if (title != null && baseMainTitle != null)
|
||||
if (title != null && baseMainTitle != null) {
|
||||
title.add(baseMainTitle);
|
||||
}
|
||||
|
||||
relevantdate = mergeLists(relevantdate, r.getRelevantdate());
|
||||
|
||||
|
|
|
@ -96,12 +96,21 @@ public class ProtoConverter implements Serializable {
|
|||
.stream()
|
||||
.distinct()
|
||||
.collect(Collectors.toCollection(ArrayList::new)) : null);
|
||||
i.setRefereed(mapStringField(ri.getRefereed()));
|
||||
i.setRefereed(mapRefereed(ri.getRefereed()));
|
||||
i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
|
||||
i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
|
||||
return i;
|
||||
}
|
||||
|
||||
private static Qualifier mapRefereed(FieldTypeProtos.StringField refereed) {
|
||||
Qualifier q = new Qualifier();
|
||||
q.setClassid(refereed.getValue());
|
||||
q.setSchemename(refereed.getValue());
|
||||
q.setSchemeid("dnet:review_levels");
|
||||
q.setSchemename("dnet:review_levels");
|
||||
return q;
|
||||
}
|
||||
|
||||
private static List<ExternalReference> convertExternalRefs(OafProtos.Oaf oaf) {
|
||||
ResultProtos.Result r = oaf.getEntity().getResult();
|
||||
if (r.getExternalReferenceCount() > 0) {
|
||||
|
|
|
@ -8,6 +8,7 @@ import java.io.File;
|
|||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
@ -19,6 +20,7 @@ import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
|
|||
import eu.dnetlib.message.Message;
|
||||
import eu.dnetlib.message.MessageManager;
|
||||
|
||||
@Disabled
|
||||
public class DnetCollectorWorkerApplicationTests {
|
||||
|
||||
private final ArgumentApplicationParser argumentParser = mock(ArgumentApplicationParser.class);
|
||||
|
|
|
@ -166,8 +166,10 @@ case object Crossref2Oaf {
|
|||
|
||||
val has_review = (json \ "relation" \"has-review" \ "id")
|
||||
|
||||
if(has_review != JNothing)
|
||||
instance.setRefereed(asField("peerReviewed"))
|
||||
if(has_review != JNothing) {
|
||||
instance.setRefereed(
|
||||
createQualifier("0001", "peerReviewed", "dnet:review_levels", "dnet:review_levels"))
|
||||
}
|
||||
|
||||
|
||||
instance.setAccessright(getRestrictedQualifier())
|
||||
|
|
|
@ -0,0 +1,119 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.clean;
|
||||
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
|
||||
import java.beans.BeanInfo;
|
||||
import java.beans.IntrospectionException;
|
||||
import java.beans.Introspector;
|
||||
import java.beans.PropertyDescriptor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import scala.Predef;
|
||||
|
||||
public class CleanGraphProperties {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(CleanGraphProperties.class);
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
String jsonConfiguration = IOUtils
|
||||
.toString(
|
||||
CleanGraphProperties.class
|
||||
.getResourceAsStream(
|
||||
"/eu/dnetlib/dhp/oa/graph/input_clean_graph_parameters.json"));
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||
parser.parseArgument(args);
|
||||
|
||||
Boolean isSparkSessionManaged = Optional
|
||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||
.map(Boolean::valueOf)
|
||||
.orElse(Boolean.TRUE);
|
||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||
|
||||
String inputPath = parser.get("inputPath");
|
||||
log.info("inputPath: {}", inputPath);
|
||||
|
||||
String outputPath = parser.get("outputPath");
|
||||
log.info("outputPath: {}", outputPath);
|
||||
|
||||
String isLookupUrl = parser.get("isLookupUrl");
|
||||
log.info("isLookupUrl: {}", isLookupUrl);
|
||||
|
||||
String graphTableClassName = parser.get("graphTableClassName");
|
||||
log.info("graphTableClassName: {}", graphTableClassName);
|
||||
|
||||
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
|
||||
|
||||
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService);
|
||||
|
||||
SparkConf conf = new SparkConf();
|
||||
runWithSparkSession(
|
||||
conf,
|
||||
isSparkSessionManaged,
|
||||
spark -> {
|
||||
removeOutputDir(spark, outputPath);
|
||||
fixGraphTable(spark, vocs, inputPath, entityClazz, outputPath);
|
||||
});
|
||||
}
|
||||
|
||||
private static <T extends Oaf> void fixGraphTable(
|
||||
SparkSession spark,
|
||||
VocabularyGroup vocs,
|
||||
String inputPath,
|
||||
Class<T> clazz,
|
||||
String outputPath) {
|
||||
|
||||
CleaningRule<T> rule = new CleaningRule<>(vocs);
|
||||
|
||||
readTableFromPath(spark, inputPath, clazz)
|
||||
.map(rule, Encoders.bean(clazz))
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.parquet(outputPath);
|
||||
}
|
||||
|
||||
private static <T extends Oaf> Dataset<T> readTableFromPath(
|
||||
SparkSession spark, String inputEntityPath, Class<T> clazz) {
|
||||
|
||||
log.info("Reading Graph table from: {}", inputEntityPath);
|
||||
return spark
|
||||
.read()
|
||||
.textFile(inputEntityPath)
|
||||
.map(
|
||||
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
|
||||
Encoders.bean(clazz));
|
||||
}
|
||||
|
||||
private static void removeOutputDir(SparkSession spark, String path) {
|
||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.clean;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
|
||||
public class CleaningRule<T extends Oaf> implements MapFunction<T, T> {
|
||||
|
||||
private VocabularyGroup vocabularies;
|
||||
|
||||
private Map<Class, Function<Object, Object>> mapping = Maps.newHashMap();
|
||||
|
||||
|
||||
public CleaningRule(VocabularyGroup vocabularies) {
|
||||
this.vocabularies = vocabularies;
|
||||
|
||||
mapping.put(Qualifier.class, o -> patchQualifier(o));
|
||||
mapping.put(StructuredProperty.class, o -> patchSp(o));
|
||||
mapping.put(Field.class, o -> patchStringField(o));
|
||||
}
|
||||
|
||||
@Override
|
||||
public T call(T value) throws Exception {
|
||||
|
||||
OafNavigator.apply(value, mapping);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private Object patchQualifier(Object o) {
|
||||
Qualifier q = (Qualifier) o;
|
||||
if (vocabularies.vocabularyExists(q.getSchemeid())) {
|
||||
return vocabularies.lookup(q.getSchemeid(), q.getClassid());
|
||||
}
|
||||
return o;
|
||||
}
|
||||
|
||||
private Object patchSp(Object o) {
|
||||
StructuredProperty sp = (StructuredProperty) o;
|
||||
if (StringUtils.isBlank(sp.getValue())) {
|
||||
return null;
|
||||
}
|
||||
return o;
|
||||
}
|
||||
|
||||
private Object patchStringField(Object o) {
|
||||
Field f = (Field) o;
|
||||
try {
|
||||
if (StringUtils.isBlank((String) f.getValue())) {
|
||||
return null;
|
||||
}
|
||||
} catch (ClassCastException e) {
|
||||
// ignored on purpose
|
||||
}
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
public VocabularyGroup getVocabularies() {
|
||||
return vocabularies;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.clean;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Lists;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.beans.BeanInfo;
|
||||
import java.beans.IntrospectionException;
|
||||
import java.beans.Introspector;
|
||||
import java.beans.PropertyDescriptor;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
|
||||
public class OafNavigator {
|
||||
|
||||
public static <E extends Oaf> E apply(E oaf, Map<Class, Function<Object, Object>> mapping) {
|
||||
reflect(oaf, mapping);
|
||||
return oaf;
|
||||
}
|
||||
|
||||
public static void reflect(Object o, Map<Class, Function<Object, Object>> mapping) {
|
||||
visit(o, mapping);
|
||||
}
|
||||
|
||||
public static void visit(final Object thingy, Map<Class, Function<Object, Object>> mapping) {
|
||||
|
||||
try {
|
||||
final Class<?> clazz = thingy.getClass();
|
||||
|
||||
if (!isPrimitive(thingy) && clazz.getPackage().equals(Oaf.class.getPackage())) {
|
||||
|
||||
final BeanInfo beanInfo = Introspector.getBeanInfo(clazz);
|
||||
|
||||
for (final PropertyDescriptor descriptor : beanInfo.getPropertyDescriptors()) {
|
||||
try {
|
||||
final Object value = descriptor.getReadMethod().invoke(thingy);
|
||||
|
||||
if (value != null && !isPrimitive(value)) {
|
||||
|
||||
System.out.println("VISITING " + descriptor.getName() + " " + descriptor.getPropertyType());
|
||||
|
||||
if (Iterable.class.isAssignableFrom(descriptor.getPropertyType())) {
|
||||
for(Object vi : (Iterable) value) {
|
||||
|
||||
visit(vi, mapping);
|
||||
}
|
||||
} else {
|
||||
|
||||
if (mapping.keySet().contains(value.getClass())) {
|
||||
final Object newValue = mapping.get(value.getClass()).apply(value);
|
||||
System.out.println("PATCHING " + descriptor.getName()+ " " + descriptor.getPropertyType());
|
||||
System.out.println("OLD VALUE " + getObjectMapper().writeValueAsString(value));
|
||||
System.out.println("NEW VALUE " + getObjectMapper().writeValueAsString(newValue));
|
||||
descriptor.getWriteMethod().invoke(newValue);
|
||||
}
|
||||
|
||||
visit(value, mapping);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (final IllegalArgumentException e) {
|
||||
// handle this please
|
||||
} catch (final IllegalAccessException e) {
|
||||
// and this also
|
||||
} catch (final InvocationTargetException e) {
|
||||
// and this, too
|
||||
} catch (JsonProcessingException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (final IntrospectionException e) {
|
||||
// do something sensible here
|
||||
}
|
||||
}
|
||||
|
||||
private static ObjectMapper getObjectMapper() {
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
return mapper;
|
||||
}
|
||||
|
||||
private static void navigate(Object o, Map<Class, Function<Object, Object>> mapping) {
|
||||
if (Objects.isNull(o) || isPrimitive(o)) {
|
||||
return;
|
||||
} else {
|
||||
try {
|
||||
for (Field field : getAllFields(o.getClass())) {
|
||||
System.out.println(field.getName());
|
||||
field.setAccessible(true);
|
||||
Object value = field.get(o);
|
||||
|
||||
if (Objects.nonNull(value)) {
|
||||
final Class<?> fieldType = field.getType();
|
||||
if ((fieldType.isArray() && !fieldType.getComponentType().isPrimitive())) {
|
||||
Object[] fs = (Object[]) value;
|
||||
for (Object fi : fs) {
|
||||
navigate(fi, mapping);
|
||||
}
|
||||
} if (Iterable.class.isAssignableFrom(fieldType)) {
|
||||
Iterable fs = (Iterable) value;
|
||||
for (Object fi : fs) {
|
||||
navigate(fi, mapping);
|
||||
}
|
||||
} else {
|
||||
if (mapping.keySet().contains(value.getClass())) {
|
||||
System.out.println("PATCHING " + field.getName());
|
||||
field.set(o, mapping.get(value.getClass()).apply(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (IllegalAccessException | IllegalArgumentException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isPrimitive(Object o) {
|
||||
return o.getClass().isPrimitive()
|
||||
|| o instanceof Class
|
||||
|| o instanceof Integer
|
||||
|| o instanceof Double
|
||||
|| o instanceof Float
|
||||
|| o instanceof Long
|
||||
|| o instanceof Boolean
|
||||
|| o instanceof String
|
||||
|| o instanceof Byte;
|
||||
}
|
||||
|
||||
private static List<Field> getAllFields(Class<?> clazz) {
|
||||
return getAllFields(new LinkedList<>(), clazz);
|
||||
}
|
||||
|
||||
private static List<Field> getAllFields(List<Field> fields, Class<?> clazz) {
|
||||
fields.addAll(Arrays.asList(clazz.getDeclaredFields()));
|
||||
|
||||
final Class<?> superclass = clazz.getSuperclass();
|
||||
if (Objects.nonNull(superclass) && superclass.getPackage().equals(Oaf.class.getPackage())) {
|
||||
getAllFields(fields, superclass);
|
||||
}
|
||||
|
||||
return fields;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.clean;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
|
||||
public class OafNavigator2 {
|
||||
|
||||
public static <E extends Oaf> E apply(E oaf, Map<Class, Function<Object, Object>> mapping) {
|
||||
navigate(oaf, mapping);
|
||||
return oaf;
|
||||
}
|
||||
|
||||
private static void navigate(Object o, Map<Class, Function<Object, Object>> mapping) {
|
||||
if (Objects.isNull(o) || isPrimitive(o)) {
|
||||
return;
|
||||
} else {
|
||||
try {
|
||||
for (Field field : getAllFields(o.getClass())) {
|
||||
System.out.println("VISITING " + field.getName() + " in " + o.getClass());
|
||||
field.setAccessible(true);
|
||||
Object value = field.get(o);
|
||||
|
||||
if (Objects.nonNull(value)) {
|
||||
final Class<?> fieldType = field.getType();
|
||||
if ((fieldType.isArray() && !fieldType.getComponentType().isPrimitive())) {
|
||||
Object[] fs = (Object[]) value;
|
||||
for (Object fi : fs) {
|
||||
navigate(fi, mapping);
|
||||
}
|
||||
} if (Iterable.class.isAssignableFrom(fieldType)) {
|
||||
Iterable fs = (Iterable) value;
|
||||
for (Object fi : fs) {
|
||||
navigate(fi, mapping);
|
||||
}
|
||||
} else {
|
||||
final Function<Object, Object> cleaningFn = mapping.get(value.getClass());
|
||||
if (Objects.nonNull(cleaningFn)) {
|
||||
final Object newValue = cleaningFn.apply(value);
|
||||
if (!Objects.equals(value, newValue)) {
|
||||
System.out.println("PATCHING " + field.getName()+ " " + value.getClass());
|
||||
System.out.println("OLD VALUE " + getObjectMapper().writeValueAsString(value));
|
||||
System.out.println("NEW VALUE " + getObjectMapper().writeValueAsString(newValue));
|
||||
field.set(o, newValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (IllegalAccessException | IllegalArgumentException | JsonProcessingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static ObjectMapper getObjectMapper() {
|
||||
final ObjectMapper mapper = new ObjectMapper();
|
||||
return mapper;
|
||||
}
|
||||
|
||||
private static boolean isPrimitive(Object o) {
|
||||
return o.getClass().isPrimitive()
|
||||
|| o instanceof Class
|
||||
|| o instanceof Integer
|
||||
|| o instanceof Double
|
||||
|| o instanceof Float
|
||||
|| o instanceof Long
|
||||
|| o instanceof Boolean
|
||||
|| o instanceof String
|
||||
|| o instanceof Byte;
|
||||
}
|
||||
|
||||
private static List<Field> getAllFields(Class<?> clazz) {
|
||||
return getAllFields(new LinkedList<>(), clazz);
|
||||
}
|
||||
|
||||
private static List<Field> getAllFields(List<Field> fields, Class<?> clazz) {
|
||||
fields.addAll(Arrays.asList(clazz.getDeclaredFields()));
|
||||
|
||||
final Class<?> superclass = clazz.getSuperclass();
|
||||
if (Objects.nonNull(superclass) && superclass.getPackage().equals(Oaf.class.getPackage())) {
|
||||
getAllFields(fields, superclass);
|
||||
}
|
||||
|
||||
return fields;
|
||||
}
|
||||
|
||||
}
|
|
@ -39,6 +39,8 @@ import eu.dnetlib.dhp.schema.oaf.Project;
|
|||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class GenerateEntitiesApplication {
|
||||
|
@ -71,7 +73,8 @@ public class GenerateEntitiesApplication {
|
|||
final String isLookupUrl = parser.get("isLookupUrl");
|
||||
log.info("isLookupUrl: {}", isLookupUrl);
|
||||
|
||||
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
|
||||
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService);
|
||||
|
||||
final SparkConf conf = new SparkConf();
|
||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||
|
@ -139,9 +142,11 @@ public class GenerateEntitiesApplication {
|
|||
switch (type.toLowerCase()) {
|
||||
case "oaf-store-claim":
|
||||
case "oaf-store-cleaned":
|
||||
case "oaf-store-claim":
|
||||
return new OafToOafMapper(vocs, false).processMdRecord(s);
|
||||
case "odf-store-claim":
|
||||
case "odf-store-cleaned":
|
||||
case "odf-store-claim":
|
||||
return new OdfToOafMapper(vocs, false).processMdRecord(s);
|
||||
case "oaf-store-intersection":
|
||||
return new OafToOafMapper(vocs, true).processMdRecord(s);
|
||||
|
|
|
@ -71,6 +71,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
|
|||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
|
||||
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
|
||||
|
||||
|
@ -151,7 +152,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
super(hdfsPath);
|
||||
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
||||
this.lastUpdateTimestamp = new Date().getTime();
|
||||
this.vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
|
||||
this.vocs = VocabularyGroup.loadVocsFromIS(ISLookupClientFactory.getLookUpService(isLookupUrl));
|
||||
}
|
||||
|
||||
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
|
||||
|
|
|
@ -133,7 +133,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
instance
|
||||
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
|
||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
||||
instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS));
|
||||
instance
|
||||
.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||
instance
|
||||
|
|
|
@ -4,19 +4,7 @@ package eu.dnetlib.dhp.oa.graph.raw;
|
|||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.createOpenaireId;
|
||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
|
||||
import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENTED_BY;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_SUPPLEMENT_TO;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.PART;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.SUPPLEMENT;
|
||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
@ -129,7 +117,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
|||
instance
|
||||
.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
|
||||
instance.setLicense(field(doc.valueOf("//oaf:license"), info));
|
||||
instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
|
||||
instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS));
|
||||
instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
|
||||
instance
|
||||
.setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info));
|
||||
|
|
|
@ -60,6 +60,10 @@ public class OafMapperUtils {
|
|||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static Qualifier unknown(final String schemeid, final String schemename) {
|
||||
return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
|
||||
}
|
||||
|
||||
public static Qualifier qualifier(
|
||||
final String classid,
|
||||
final String classname,
|
||||
|
|
|
@ -4,14 +4,29 @@ package eu.dnetlib.dhp.oa.graph.raw.common;
|
|||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
|
||||
public class Vocabulary implements Serializable {
|
||||
|
||||
private final String id;
|
||||
private final String name;
|
||||
|
||||
/**
|
||||
* Code to Term mappings for this Vocabulary.
|
||||
*/
|
||||
private final Map<String, VocabularyTerm> terms = new HashMap<>();
|
||||
|
||||
/**
|
||||
* Synonym to Code mappings for this Vocabulary.
|
||||
*/
|
||||
private final Map<String, String> synonyms = Maps.newHashMap();
|
||||
|
||||
public Vocabulary(final String id, final String name) {
|
||||
this.id = id;
|
||||
this.name = name;
|
||||
|
@ -30,7 +45,7 @@ public class Vocabulary implements Serializable {
|
|||
}
|
||||
|
||||
public VocabularyTerm getTerm(final String id) {
|
||||
return terms.get(id.toLowerCase());
|
||||
return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null);
|
||||
}
|
||||
|
||||
protected void addTerm(final String id, final String name) {
|
||||
|
@ -40,4 +55,32 @@ public class Vocabulary implements Serializable {
|
|||
protected boolean termExists(final String id) {
|
||||
return terms.containsKey(id.toLowerCase());
|
||||
}
|
||||
|
||||
protected void addSynonym(final String syn, final String termCode) {
|
||||
synonyms.put(syn, termCode.toLowerCase());
|
||||
}
|
||||
|
||||
public VocabularyTerm getTermBySynonym(final String syn) {
|
||||
return getTerm(synonyms.get(syn));
|
||||
}
|
||||
|
||||
public Qualifier getTermAsQualifier(final String termId) {
|
||||
if (StringUtils.isBlank(termId)) {
|
||||
return OafMapperUtils.unknown(getId(), getName());
|
||||
} else if (termExists(termId)) {
|
||||
final VocabularyTerm t = getTerm(termId);
|
||||
return OafMapperUtils.qualifier(t.getId(), t.getName(), getId(), getName());
|
||||
} else {
|
||||
return OafMapperUtils.qualifier(termId, termId, getId(), getName());
|
||||
}
|
||||
}
|
||||
|
||||
public Qualifier getSynonymAsQualifier(final String syn) {
|
||||
return Optional
|
||||
.ofNullable(getTermBySynonym(syn))
|
||||
.map(term -> getTermAsQualifier(term.getId()))
|
||||
.orElse(null);
|
||||
// .orElse(OafMapperUtils.unknown(getId(), getName()));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,33 +1,39 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
public class VocabularyGroup implements Serializable {
|
||||
|
||||
public static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException {
|
||||
final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||
public static final String VOCABULARIES_XQUERY = "for $x in collection(' /db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') \n"
|
||||
+
|
||||
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
|
||||
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
|
||||
"for $term in ($x//TERM)\n" +
|
||||
"return concat($vocid,' @=@ ',$vocname,' @=@ ',$term/@code,' @=@ ',$term/@english_name)";
|
||||
|
||||
final String xquery = IOUtils
|
||||
.toString(
|
||||
GenerateEntitiesApplication.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery"));
|
||||
public static final String VOCABULARY_SYNONYMS_XQUERY = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')\n"
|
||||
+
|
||||
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
|
||||
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
|
||||
"for $term in ($x//TERM)\n" +
|
||||
"for $syn in ($term//SYNONYM/@term)\n" +
|
||||
"return concat($vocid,' @=@ ',$term/@code,' @=@ ', $syn)\n";
|
||||
|
||||
public static VocabularyGroup loadVocsFromIS(ISLookUpService isLookUpService) throws ISLookUpException {
|
||||
|
||||
final VocabularyGroup vocs = new VocabularyGroup();
|
||||
|
||||
for (final String s : isLookUpService.quickSearchProfile(xquery)) {
|
||||
for (final String s : isLookUpService.quickSearchProfile(VOCABULARIES_XQUERY)) {
|
||||
final String[] arr = s.split("@=@");
|
||||
if (arr.length == 4) {
|
||||
final String vocId = arr[0].trim();
|
||||
|
@ -40,6 +46,19 @@ public class VocabularyGroup implements Serializable {
|
|||
}
|
||||
|
||||
vocs.addTerm(vocId, termId, termName);
|
||||
vocs.addSynonyms(vocId, termId, termId);
|
||||
}
|
||||
}
|
||||
|
||||
for (final String s : isLookUpService.quickSearchProfile(VOCABULARY_SYNONYMS_XQUERY)) {
|
||||
final String[] arr = s.split("@=@");
|
||||
if (arr.length == 3) {
|
||||
final String vocId = arr[0].trim();
|
||||
final String termId = arr[1].trim();
|
||||
final String syn = arr[2].trim();
|
||||
|
||||
vocs.addSynonyms(vocId, termId, syn);
|
||||
vocs.addSynonyms(vocId, termId, termId);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -66,16 +85,37 @@ public class VocabularyGroup implements Serializable {
|
|||
}
|
||||
}
|
||||
|
||||
public Qualifier getTermAsQualifier(final String vocId, final String id) {
|
||||
if (StringUtils.isBlank(id)) {
|
||||
return OafMapperUtils.qualifier("UNKNOWN", "UNKNOWN", vocId, vocId);
|
||||
} else if (termExists(vocId, id)) {
|
||||
final Vocabulary v = vocs.get(vocId.toLowerCase());
|
||||
final VocabularyTerm t = v.getTerm(id);
|
||||
return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName());
|
||||
} else {
|
||||
return OafMapperUtils.qualifier(id, id, vocId, vocId);
|
||||
public Set<String> getTerms(String vocId) {
|
||||
if (!vocabularyExists(vocId)) {
|
||||
return new HashSet<>();
|
||||
}
|
||||
return vocs
|
||||
.get(vocId.toLowerCase())
|
||||
.getTerms()
|
||||
.values()
|
||||
.stream()
|
||||
.map(t -> t.getId())
|
||||
.collect(Collectors.toCollection(HashSet::new));
|
||||
}
|
||||
|
||||
public Qualifier lookup(String vocId, String id) {
|
||||
return Optional
|
||||
.ofNullable(getSynonymAsQualifier(vocId, id))
|
||||
.orElse(getTermAsQualifier(vocId, id));
|
||||
}
|
||||
|
||||
public Qualifier getTermAsQualifier(final String vocId, final String id) {
|
||||
if (vocabularyExists(vocId)) {
|
||||
return vocs.get(vocId.toLowerCase()).getTermAsQualifier(id);
|
||||
}
|
||||
return OafMapperUtils.qualifier(id, id, "", "");
|
||||
}
|
||||
|
||||
public Qualifier getSynonymAsQualifier(final String vocId, final String syn) {
|
||||
if (StringUtils.isBlank(vocId)) {
|
||||
return OafMapperUtils.unknown("", "");
|
||||
}
|
||||
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
|
||||
}
|
||||
|
||||
public boolean termExists(final String vocId, final String id) {
|
||||
|
@ -86,4 +126,16 @@ public class VocabularyGroup implements Serializable {
|
|||
return vocs.containsKey(vocId.toLowerCase());
|
||||
}
|
||||
|
||||
private void addSynonyms(final String vocId, final String termId, final String syn) {
|
||||
String id = Optional
|
||||
.ofNullable(vocId)
|
||||
.map(s -> s.toLowerCase())
|
||||
.orElseThrow(
|
||||
() -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]")));
|
||||
Optional
|
||||
.ofNullable(vocs.get(id))
|
||||
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
||||
.addSynonym(syn, termId);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
<configuration>
|
||||
<property>
|
||||
<name>jobTracker</name>
|
||||
<value>yarnRM</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>nameNode</name>
|
||||
<value>hdfs://nameservice1</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.use.system.libpath</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozie.action.sharelib.for.spark</name>
|
||||
<value>spark2</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,289 @@
|
|||
<workflow-app name="clean graph" xmlns="uri:oozie:workflow:0.5">
|
||||
|
||||
<parameters>
|
||||
<property>
|
||||
<name>graphInputPath</name>
|
||||
<description>the input path to read graph content</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>graphOutputPath</name>
|
||||
<description>the target path to store cleaned graph</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>isLookupUrl</name>
|
||||
<description>the address of the lookUp service</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorMemory</name>
|
||||
<description>memory for individual executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkExecutorCores</name>
|
||||
<description>number of cores used by single executor</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>oozieActionShareLibForSpark2</name>
|
||||
<description>oozie action sharelib for spark 2.*</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2ExtraListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorAppListener</value>
|
||||
<description>spark 2.* extra listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2SqlQueryExecutionListeners</name>
|
||||
<value>com.cloudera.spark.lineage.NavigatorQueryListener</value>
|
||||
<description>spark 2.* sql query execution listeners classname</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2YarnHistoryServerAddress</name>
|
||||
<description>spark 2.* yarn history server address</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>spark2EventLogDir</name>
|
||||
<description>spark 2.* event log dir location</description>
|
||||
</property>
|
||||
</parameters>
|
||||
|
||||
<start to="fork_clean_graph"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<fork name="fork_clean_graph">
|
||||
<path start="clean_publication"/>
|
||||
<path start="clean_dataset"/>
|
||||
<path start="clean_otherresearchproduct"/>
|
||||
<path start="clean_software"/>
|
||||
<path start="clean_datasource"/>
|
||||
<path start="clean_organization"/>
|
||||
<path start="clean_project"/>
|
||||
<path start="clean_relation"/>
|
||||
</fork>
|
||||
|
||||
<action name="clean_publication">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean publications</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCoresForJoining}
|
||||
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||
--driver-memory=${sparkDriverMemoryForJoining}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${graphInputPath}/publication</arg>
|
||||
<arg>--outputPath</arg><arg>${graphOutputPath}/publication</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="clean_dataset">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean datasets</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCoresForJoining}
|
||||
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||
--driver-memory=${sparkDriverMemoryForJoining}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${graphInputPath}/dataset</arg>
|
||||
<arg>--outputPath</arg><arg>${graphOutputPath}/dataset</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="clean_otherresearchproduct">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean otherresearchproducts</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCoresForJoining}
|
||||
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||
--driver-memory=${sparkDriverMemoryForJoining}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${graphInputPath}/otherresearchproduct</arg>
|
||||
<arg>--outputPath</arg><arg>${graphOutputPath}/otherresearchproduct</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="clean_software">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean softwares</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCoresForJoining}
|
||||
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||
--driver-memory=${sparkDriverMemoryForJoining}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${graphInputPath}/software</arg>
|
||||
<arg>--outputPath</arg><arg>${graphOutputPath}/software</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="clean_datasource">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean datasources</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCoresForJoining}
|
||||
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||
--driver-memory=${sparkDriverMemoryForJoining}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${graphInputPath}/datasource</arg>
|
||||
<arg>--outputPath</arg><arg>${graphOutputPath}/datasource</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="clean_organization">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean organizations</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCoresForJoining}
|
||||
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||
--driver-memory=${sparkDriverMemoryForJoining}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${graphInputPath}/organization</arg>
|
||||
<arg>--outputPath</arg><arg>${graphOutputPath}/organization</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="clean_project">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean projects</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCoresForJoining}
|
||||
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||
--driver-memory=${sparkDriverMemoryForJoining}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${graphInputPath}/project</arg>
|
||||
<arg>--outputPath</arg><arg>${graphOutputPath}/project</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="clean_relation">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>Clean relations</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCoresForJoining}
|
||||
--executor-memory=${sparkExecutorMemoryForJoining}
|
||||
--driver-memory=${sparkDriverMemoryForJoining}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
--conf spark.network.timeout=${sparkNetworkTimeout}
|
||||
</spark-opts>
|
||||
<arg>--inputPath</arg><arg>${graphInputPath}/relation</arg>
|
||||
<arg>--outputPath</arg><arg>${graphOutputPath}/relation</arg>
|
||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
</spark>
|
||||
<ok to="wait_clean"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<join name="wait_clean" to="End"/>
|
||||
|
||||
<end name="End"/>
|
||||
</workflow-app>
|
|
@ -0,0 +1,32 @@
|
|||
[
|
||||
{
|
||||
"paramName": "issm",
|
||||
"paramLongName": "isSparkSessionManaged",
|
||||
"paramDescription": "when true will stop SparkSession after job execution",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "in",
|
||||
"paramLongName": "inputPath",
|
||||
"paramDescription": "the path to the graph data dump to read",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "out",
|
||||
"paramLongName": "outputPath",
|
||||
"paramDescription": "the path to store the output graph",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "isu",
|
||||
"paramLongName": "isLookupUrl",
|
||||
"paramDescription": "url to the ISLookup Service",
|
||||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "class",
|
||||
"paramLongName": "graphTableClassName",
|
||||
"paramDescription": "class name moelling the graph table",
|
||||
"paramRequired": true
|
||||
}
|
||||
]
|
|
@ -0,0 +1,6 @@
|
|||
for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')
|
||||
let $vocid := $x//VOCABULARY_NAME/@code
|
||||
let $vocname := $x//VOCABULARY_NAME/text()
|
||||
for $term in ($x//TERM)
|
||||
for $syn in ($term//SYNONYM/@term)
|
||||
return concat($vocid,' @=@ ',$term/@code,' @=@ ', $syn)
|
|
@ -0,0 +1,112 @@
|
|||
|
||||
package eu.dnetlib.dhp.oa.graph.clean;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.mockito.Mockito.lenient;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.junit.platform.commons.util.StringUtils;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyTerm;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
public class CleaningRuleTest {
|
||||
|
||||
public static final ObjectMapper MAPPER = new ObjectMapper();
|
||||
|
||||
@Mock
|
||||
private ISLookUpService isLookUpService;
|
||||
|
||||
private VocabularyGroup vocabularies;
|
||||
|
||||
private CleaningRule<Publication> cleaningRule;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws ISLookUpException, IOException {
|
||||
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||
lenient()
|
||||
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||
.thenReturn(synonyms());
|
||||
|
||||
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||
cleaningRule = new CleaningRule(vocabularies);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCleaning() throws Exception {
|
||||
|
||||
assertNotNull(cleaningRule.getVocabularies());
|
||||
|
||||
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json"));
|
||||
Publication p_in = MAPPER.readValue(json, Publication.class);
|
||||
|
||||
Publication p_out = cleaningRule.call(p_in);
|
||||
|
||||
assertNotNull(p_out);
|
||||
|
||||
assertEquals("eng", p_out.getLanguage().getClassid());
|
||||
assertEquals("English", p_out.getLanguage().getClassname());
|
||||
|
||||
assertEquals("0018", p_out.getInstance().get(0).getInstancetype().getClassid());
|
||||
assertEquals("Annotation", p_out.getInstance().get(0).getInstancetype().getClassname());
|
||||
|
||||
assertEquals("CLOSED", p_out.getInstance().get(0).getAccessright().getClassid());
|
||||
assertEquals("Closed Access", p_out.getInstance().get(0).getAccessright().getClassname());
|
||||
|
||||
Set<String> pidTerms = vocabularies.getTerms("dnet:pid_types");
|
||||
assertTrue(
|
||||
p_out
|
||||
.getPid()
|
||||
.stream()
|
||||
.map(p -> p.getQualifier())
|
||||
.allMatch(q -> pidTerms.contains(q.getClassid())));
|
||||
|
||||
// TODO add more assertions to verity the cleaned values
|
||||
System.out.println(MAPPER.writeValueAsString(p_out));
|
||||
|
||||
assertTrue(
|
||||
p_out
|
||||
.getPid()
|
||||
.stream()
|
||||
.allMatch(sp -> StringUtils.isNotBlank(sp.getValue())));
|
||||
}
|
||||
|
||||
private Stream<Qualifier> getAuthorPidTypes(Publication pub) {
|
||||
return pub
|
||||
.getAuthor()
|
||||
.stream()
|
||||
.map(a -> a.getPid())
|
||||
.flatMap(p -> p.stream())
|
||||
.map(s -> s.getQualifier());
|
||||
}
|
||||
|
||||
private List<String> vocs() throws IOException {
|
||||
return IOUtils
|
||||
.readLines(CleaningRuleTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
|
||||
}
|
||||
|
||||
private List<String> synonyms() throws IOException {
|
||||
return IOUtils
|
||||
.readLines(CleaningRuleTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,757 @@
|
|||
{
|
||||
"author": [
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Brien, Tom",
|
||||
"name": "Tom",
|
||||
"pid": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "ORCID12",
|
||||
"classname": "ORCID12",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "0000-0001-9613-6639"
|
||||
}
|
||||
],
|
||||
"rank": 1,
|
||||
"surname": "Brien"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Ade, Peter",
|
||||
"name": "Peter",
|
||||
"pid": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "xyz",
|
||||
"classname": "XYZ",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "qwerty"
|
||||
}
|
||||
],
|
||||
"rank": 2,
|
||||
"surname": "Ade"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Barry, Peter S.",
|
||||
"name": "Peter S.",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 3,
|
||||
"surname": "Barry"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Dunscombe, Chris J.",
|
||||
"name": "Chris J.",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 4,
|
||||
"surname": "Dunscombe"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Leadley, David R.",
|
||||
"name": "David R.",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 5,
|
||||
"surname": "Leadley"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Morozov, Dmitry V.",
|
||||
"name": "Dmitry V.",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 6,
|
||||
"surname": "Morozov"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Myronov, Maksym",
|
||||
"name": "Maksym",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 7,
|
||||
"surname": "Myronov"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Parker, Evan",
|
||||
"name": "Evan",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 8,
|
||||
"surname": "Parker"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Prest, Martin J.",
|
||||
"name": "Martin J.",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 9,
|
||||
"surname": "Prest"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Prunnila, Mika",
|
||||
"name": "Mika",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 10,
|
||||
"surname": "Prunnila"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Sudiwala, Rashmi V.",
|
||||
"name": "Rashmi V.",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 11,
|
||||
"surname": "Sudiwala"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Whall, Terry E.",
|
||||
"name": "Terry E.",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 12,
|
||||
"surname": "Whall"
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": "Mauskopf",
|
||||
"name": "",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 13,
|
||||
"surname": ""
|
||||
},
|
||||
{
|
||||
"affiliation": [
|
||||
],
|
||||
"fullname": " P. D. ",
|
||||
"name": "",
|
||||
"pid": [
|
||||
],
|
||||
"rank": 14,
|
||||
"surname": ""
|
||||
}
|
||||
],
|
||||
"bestaccessright": {
|
||||
"classid": "CLOSED",
|
||||
"classname": "Closed Access",
|
||||
"schemeid": "dnet:access_modes",
|
||||
"schemename": "dnet:access_modes"
|
||||
},
|
||||
"collectedfrom": [
|
||||
{
|
||||
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||
"value": "VIRTA"
|
||||
}
|
||||
],
|
||||
"context": [
|
||||
],
|
||||
"contributor": [
|
||||
],
|
||||
"country": [
|
||||
],
|
||||
"coverage": [
|
||||
],
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"dateofacceptance": {
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"value": "2016-01-01"
|
||||
},
|
||||
"dateofcollection": "",
|
||||
"dateoftransformation": "2020-04-22T12:34:08.009Z",
|
||||
"description": [
|
||||
],
|
||||
"externalReference": [
|
||||
],
|
||||
"extraInfo": [
|
||||
],
|
||||
"format": [
|
||||
],
|
||||
"fulltext": [
|
||||
],
|
||||
"id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375",
|
||||
"instance": [
|
||||
{
|
||||
"accessright": {
|
||||
"classid": "CLOSED",
|
||||
"classname": "CLOSED",
|
||||
"schemeid": "dnet:access_modes",
|
||||
"schemename": "dnet:access_modes"
|
||||
},
|
||||
"collectedfrom": {
|
||||
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||
"value": "VIRTA"
|
||||
},
|
||||
"dateofacceptance": {
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"value": "2016-01-01"
|
||||
},
|
||||
"distributionlocation": "",
|
||||
"hostedby": {
|
||||
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||
"value": "VIRTA"
|
||||
},
|
||||
"instancetype": {
|
||||
"classid": "Comentario",
|
||||
"classname": "Comentario",
|
||||
"schemeid": "dnet:publication_resource",
|
||||
"schemename": "dnet:publication_resource"
|
||||
},
|
||||
"url": [
|
||||
"http://juuli.fi/Record/0275158616",
|
||||
"http://dx.doi.org/10.1007/s109090161569x"
|
||||
]
|
||||
}
|
||||
],
|
||||
"journal": {
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"edition": "",
|
||||
"ep": " 7",
|
||||
"iss": "9 March",
|
||||
"issnLinking": "",
|
||||
"issnOnline": "",
|
||||
"issnPrinted": "0022-2291",
|
||||
"name": "Journal of Low Temperature Physics - Early Acces",
|
||||
"sp": "1 ",
|
||||
"vol": ""
|
||||
},
|
||||
"language": {
|
||||
"classid": "en",
|
||||
"classname": "en",
|
||||
"schemeid": "dnet:languages",
|
||||
"schemename": "dnet:languages"
|
||||
},
|
||||
"lastupdatetimestamp": 1591283286319,
|
||||
"oaiprovenance": {
|
||||
"originDescription": {
|
||||
"altered": true,
|
||||
"baseURL": "https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif",
|
||||
"datestamp": "2019-07-30",
|
||||
"harvestDate": "2020-04-22T11:04:38.685Z",
|
||||
"identifier": "oai:virta-jtp.csc.fi:Publications/0275158616",
|
||||
"metadataNamespace": ""
|
||||
}
|
||||
},
|
||||
"originalId": [
|
||||
"CSC_________::2250a70c903c6ac6e4c01438259e9375"
|
||||
],
|
||||
"pid": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "doi",
|
||||
"classname": "doi",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "10.1007/s109090161569x"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "doi",
|
||||
"classname": "doi",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": "10.1007/s109090161569x"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "doi",
|
||||
"classname": "doi",
|
||||
"schemeid": "dnet:pid_types",
|
||||
"schemename": "dnet:pid_types"
|
||||
},
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"relevantdate": [
|
||||
],
|
||||
"resourcetype": {
|
||||
"classid": "0001",
|
||||
"classname": "0001",
|
||||
"schemeid": "dnet:dataCite_resource",
|
||||
"schemename": "dnet:dataCite_resource"
|
||||
},
|
||||
"resulttype": {
|
||||
"classid": "publication",
|
||||
"classname": "publication",
|
||||
"schemeid": "dnet:result_typologies",
|
||||
"schemename": "dnet:result_typologies"
|
||||
},
|
||||
"source": [
|
||||
],
|
||||
"subject": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "ta213"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "infrared detectors"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "lens antennas"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "silicon"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "slot antennas"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "strained silicon"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "cold electron bolometers"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "doped silicon"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "measure noise"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "noise equivalent power"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "optical characterisation"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "optical response"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "photon noise"
|
||||
},
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "",
|
||||
"classname": "",
|
||||
"schemeid": "",
|
||||
"schemename": ""
|
||||
},
|
||||
"value": "silicon absorbers"
|
||||
}
|
||||
],
|
||||
"title": [
|
||||
{
|
||||
"dataInfo": {
|
||||
"deletedbyinference": false,
|
||||
"inferenceprovenance": "",
|
||||
"inferred": false,
|
||||
"invisible": false,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:datasetarchive",
|
||||
"classname": "sysimport:crosswalk:datasetarchive",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
},
|
||||
"trust": "0.9"
|
||||
},
|
||||
"qualifier": {
|
||||
"classid": "main title",
|
||||
"classname": "main title",
|
||||
"schemeid": "dnet:dataCite_title",
|
||||
"schemename": "dnet:dataCite_title"
|
||||
},
|
||||
"value": "Optical response of strained- and unstrained-silicon cold-electron bolometers"
|
||||
}
|
||||
]
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1168,10 +1168,10 @@ public class XmlRecordFactory implements Serializable {
|
|||
.asXmlElement(
|
||||
"distributionlocation", instance.getDistributionlocation()));
|
||||
}
|
||||
if (instance.getRefereed() != null && isNotBlank(instance.getRefereed().getValue())) {
|
||||
if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) {
|
||||
fields
|
||||
.add(
|
||||
XmlSerializationUtils.asXmlElement("refereed", instance.getRefereed().getValue()));
|
||||
XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed()));
|
||||
}
|
||||
if (instance.getProcessingchargeamount() != null
|
||||
&& isNotBlank(instance.getProcessingchargeamount().getValue())) {
|
||||
|
|
Loading…
Reference in New Issue