forked from D-Net/dnet-hadoop
WIP: graph cleaner implementation
This commit is contained in:
parent
3d871c6651
commit
d9f33582c5
|
@ -0,0 +1,119 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
|
import java.beans.BeanInfo;
|
||||||
|
import java.beans.IntrospectionException;
|
||||||
|
import java.beans.Introspector;
|
||||||
|
import java.beans.PropertyDescriptor;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.spark.SparkConf;
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
import scala.Predef;
|
||||||
|
|
||||||
|
public class CleanGraphProperties {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(CleanGraphProperties.class);
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
String jsonConfiguration = IOUtils
|
||||||
|
.toString(
|
||||||
|
CleanGraphProperties.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/oa/graph/input_clean_graph_parameters.json"));
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
Boolean isSparkSessionManaged = Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
|
||||||
|
String inputPath = parser.get("inputPath");
|
||||||
|
log.info("inputPath: {}", inputPath);
|
||||||
|
|
||||||
|
String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
|
String isLookupUrl = parser.get("isLookupUrl");
|
||||||
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
|
||||||
|
String graphTableClassName = parser.get("graphTableClassName");
|
||||||
|
log.info("graphTableClassName: {}", graphTableClassName);
|
||||||
|
|
||||||
|
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
|
||||||
|
|
||||||
|
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService);
|
||||||
|
|
||||||
|
SparkConf conf = new SparkConf();
|
||||||
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
spark -> {
|
||||||
|
removeOutputDir(spark, outputPath);
|
||||||
|
fixGraphTable(spark, vocs, inputPath, entityClazz, outputPath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends Oaf> void fixGraphTable(
|
||||||
|
SparkSession spark,
|
||||||
|
VocabularyGroup vocs,
|
||||||
|
String inputPath,
|
||||||
|
Class<T> clazz,
|
||||||
|
String outputPath) {
|
||||||
|
|
||||||
|
CleaningRule<T> rule = new CleaningRule<>(vocs);
|
||||||
|
|
||||||
|
readTableFromPath(spark, inputPath, clazz)
|
||||||
|
.map(rule, Encoders.bean(clazz))
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.parquet(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T extends Oaf> Dataset<T> readTableFromPath(
|
||||||
|
SparkSession spark, String inputEntityPath, Class<T> clazz) {
|
||||||
|
|
||||||
|
log.info("Reading Graph table from: {}", inputEntityPath);
|
||||||
|
return spark
|
||||||
|
.read()
|
||||||
|
.textFile(inputEntityPath)
|
||||||
|
.map(
|
||||||
|
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
|
||||||
|
Encoders.bean(clazz));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void removeOutputDir(SparkSession spark, String path) {
|
||||||
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,89 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import java.beans.BeanInfo;
|
||||||
|
import java.beans.IntrospectionException;
|
||||||
|
import java.beans.Introspector;
|
||||||
|
import java.beans.PropertyDescriptor;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
|
||||||
|
public class CleaningRule<T extends Oaf> implements MapFunction<T, T> {
|
||||||
|
|
||||||
|
private VocabularyGroup vocabularies;
|
||||||
|
|
||||||
|
public CleaningRule(VocabularyGroup vocabularies) {
|
||||||
|
this.vocabularies = vocabularies;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public T call(T value) throws Exception {
|
||||||
|
|
||||||
|
doClean(value);
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doClean(Object o) {
|
||||||
|
if (Objects.isNull(o)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (o instanceof Iterable) {
|
||||||
|
for (Object oi : (Iterable) o) {
|
||||||
|
doClean(oi);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
|
||||||
|
Class clazz = o.getClass();
|
||||||
|
|
||||||
|
if (clazz.isPrimitive()
|
||||||
|
|| o instanceof Integer
|
||||||
|
|| o instanceof Double
|
||||||
|
|| o instanceof Float
|
||||||
|
|| o instanceof Long
|
||||||
|
|| o instanceof Boolean
|
||||||
|
|| o instanceof String) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
for (Field field : getAllFields(new LinkedList<>(), clazz)) {
|
||||||
|
field.setAccessible(true);
|
||||||
|
Object value = field.get(o);
|
||||||
|
if (value instanceof Qualifier) {
|
||||||
|
Qualifier q = (Qualifier) value;
|
||||||
|
if (vocabularies.vocabularyExists(q.getSchemeid())) {
|
||||||
|
|
||||||
|
field.set(o, vocabularies.lookup(q.getSchemeid(), q.getClassid()));
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
doClean(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IllegalAccessException | IllegalArgumentException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Field> getAllFields(List<Field> fields, Class<?> clazz) {
|
||||||
|
fields.addAll(Arrays.asList(clazz.getDeclaredFields()));
|
||||||
|
|
||||||
|
final Class<?> superclass = clazz.getSuperclass();
|
||||||
|
if (Objects.nonNull(superclass) && superclass.getPackage().equals(Oaf.class.getPackage())) {
|
||||||
|
getAllFields(fields, superclass);
|
||||||
|
}
|
||||||
|
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,210 +0,0 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.fix;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils;
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
|
||||||
import eu.dnetlib.dhp.schema.common.EntityType;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
|
||||||
import org.apache.spark.sql.Dataset;
|
|
||||||
import org.apache.spark.sql.Encoders;
|
|
||||||
import org.apache.spark.sql.SaveMode;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
public class FixGraphProperties {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(FixGraphProperties.class);
|
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
|
||||||
|
|
||||||
String jsonConfiguration = IOUtils
|
|
||||||
.toString(
|
|
||||||
FixGraphProperties.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/oa/graph/input_fix_graph_parameters.json"));
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
||||||
|
|
||||||
String inputPath = parser.get("inputPath");
|
|
||||||
log.info("inputPath: {}", inputPath);
|
|
||||||
|
|
||||||
String outputPath = parser.get("outputPath");
|
|
||||||
log.info("outputPath: {}", outputPath);
|
|
||||||
|
|
||||||
String isLookupUrl = parser.get("isLookupUrl");
|
|
||||||
log.info("isLookupUrl: {}", isLookupUrl);
|
|
||||||
|
|
||||||
String graphTableClassName = parser.get("graphTableClassName");
|
|
||||||
log.info("graphTableClassName: {}", graphTableClassName);
|
|
||||||
|
|
||||||
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
|
|
||||||
|
|
||||||
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
runWithSparkSession(
|
|
||||||
conf,
|
|
||||||
isSparkSessionManaged,
|
|
||||||
spark -> {
|
|
||||||
removeOutputDir(spark, outputPath);
|
|
||||||
fixGraphTable(spark, vocs, inputPath, entityClazz, outputPath);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <T extends Oaf> void fixGraphTable(
|
|
||||||
SparkSession spark,
|
|
||||||
VocabularyGroup vocs,
|
|
||||||
String inputPath,
|
|
||||||
Class<T> clazz,
|
|
||||||
String outputPath) {
|
|
||||||
|
|
||||||
MapFunction<T, T> fixFn = getFixingFunction(vocs, clazz);
|
|
||||||
|
|
||||||
readTableFromPath(spark, inputPath, clazz)
|
|
||||||
.map(fixFn, Encoders.bean(clazz))
|
|
||||||
.write()
|
|
||||||
.mode(SaveMode.Overwrite)
|
|
||||||
.parquet(outputPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <T extends Oaf> MapFunction<T, T> getFixingFunction(VocabularyGroup vocs, Class<T> clazz) {
|
|
||||||
|
|
||||||
switch (clazz.getCanonicalName()) {
|
|
||||||
case "eu.dnetlib.dhp.schema.oaf.Publication":
|
|
||||||
case "eu.dnetlib.dhp.schema.oaf.Dataset":
|
|
||||||
case "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct":
|
|
||||||
case "eu.dnetlib.dhp.schema.oaf.Software":
|
|
||||||
return (MapFunction<T, T>) value -> {
|
|
||||||
Result r = (Result) value;
|
|
||||||
|
|
||||||
if (r.getLanguage() != null) {
|
|
||||||
r.setLanguage(vocs.getTermAsQualifier("dnet:languages", "und"));
|
|
||||||
} else {
|
|
||||||
r.setLanguage(vocs.getTermAsQualifier("dnet:languages", r.getLanguage().getClassid()));
|
|
||||||
}
|
|
||||||
if (r.getCountry() != null) {
|
|
||||||
r.setCountry(
|
|
||||||
r.getCountry()
|
|
||||||
.stream()
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.map(c -> {
|
|
||||||
Qualifier q = vocs.getTermAsQualifier("dnet:countries", c.getClassid());
|
|
||||||
Country cn = new Country();
|
|
||||||
cn.setDataInfo(c.getDataInfo());
|
|
||||||
cn.setClassid(q.getClassid());
|
|
||||||
cn.setClassname(cn.getClassname());
|
|
||||||
cn.setSchemeid("dnet:countries");
|
|
||||||
cn.setSchemename("dnet:countries");
|
|
||||||
return cn;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (r.getSubject() != null) {
|
|
||||||
r.setSubject(
|
|
||||||
r.getSubject()
|
|
||||||
.stream()
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.map(s -> {
|
|
||||||
if (s.getQualifier() == null || StringUtils.isBlank(s.getQualifier().getClassid())) {
|
|
||||||
s.setQualifier(vocs.getTermAsQualifier("dnet:subject_classification_typologies", "UNKNOWN"));
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList())
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (r.getPublisher() != null && StringUtils.isBlank(r.getPublisher().getValue())) {
|
|
||||||
r.setPublisher(null);
|
|
||||||
}
|
|
||||||
if (r.getBestaccessright() == null) {
|
|
||||||
r.setBestaccessright(vocs.getTermAsQualifier("dnet:access_modes", "UNKNOWN"));
|
|
||||||
}
|
|
||||||
if (r.getInstance() != null) {
|
|
||||||
for(Instance i : r.getInstance()) {
|
|
||||||
if (i.getAccessright() == null) {
|
|
||||||
i.setAccessright(vocs.getTermAsQualifier("dnet:access_modes", "UNKNOWN"));
|
|
||||||
}
|
|
||||||
if (i.getInstancetype() != null) {
|
|
||||||
i.setInstancetype(vocs.getTermAsQualifier("dnet:publication_resource", i.getInstancetype().getClassid()));
|
|
||||||
} else {
|
|
||||||
i.setInstancetype(vocs.getTermAsQualifier("dnet:publication_resource", "0000"));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return clazz.cast(r);
|
|
||||||
};
|
|
||||||
case "eu.dnetlib.dhp.schema.oaf.Datasource":
|
|
||||||
return (MapFunction<T, T>) value -> {
|
|
||||||
return value;
|
|
||||||
};
|
|
||||||
case "eu.dnetlib.dhp.schema.oaf.Organization":
|
|
||||||
return (MapFunction<T, T>) value -> {
|
|
||||||
Organization o = (Organization) value;
|
|
||||||
|
|
||||||
if (o.getCountry() == null) {
|
|
||||||
o.setCountry(vocs.getTermAsQualifier("dnet:countries", "UNKNOWN"));
|
|
||||||
} else {
|
|
||||||
o.setCountry(vocs.getTermAsQualifier("dnet:countries", o.getCountry().getClassid()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return clazz.cast(o);
|
|
||||||
};
|
|
||||||
case "eu.dnetlib.dhp.schema.oaf.Project":
|
|
||||||
return (MapFunction<T, T>) value -> {
|
|
||||||
return value;
|
|
||||||
};
|
|
||||||
case "eu.dnetlib.dhp.schema.oaf.Relation":
|
|
||||||
return (MapFunction<T, T>) value -> {
|
|
||||||
return value;
|
|
||||||
};
|
|
||||||
default:
|
|
||||||
throw new RuntimeException("unknown class: " + clazz.getCanonicalName());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <T extends Oaf> Dataset<T> readTableFromPath(
|
|
||||||
SparkSession spark, String inputEntityPath, Class<T> clazz) {
|
|
||||||
|
|
||||||
log.info("Reading Graph table from: {}", inputEntityPath);
|
|
||||||
return spark
|
|
||||||
.read()
|
|
||||||
.textFile(inputEntityPath)
|
|
||||||
.map(
|
|
||||||
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
|
|
||||||
Encoders.bean(clazz));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void removeOutputDir(SparkSession spark, String path) {
|
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -39,6 +39,8 @@ import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class GenerateEntitiesApplication {
|
public class GenerateEntitiesApplication {
|
||||||
|
@ -71,7 +73,8 @@ public class GenerateEntitiesApplication {
|
||||||
final String isLookupUrl = parser.get("isLookupUrl");
|
final String isLookupUrl = parser.get("isLookupUrl");
|
||||||
log.info("isLookupUrl: {}", isLookupUrl);
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
|
||||||
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
|
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService);
|
||||||
|
|
||||||
final SparkConf conf = new SparkConf();
|
final SparkConf conf = new SparkConf();
|
||||||
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
|
||||||
|
|
|
@ -71,6 +71,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
|
|
||||||
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
|
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
|
||||||
|
|
||||||
|
@ -151,7 +152,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
super(hdfsPath);
|
super(hdfsPath);
|
||||||
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
||||||
this.lastUpdateTimestamp = new Date().getTime();
|
this.lastUpdateTimestamp = new Date().getTime();
|
||||||
this.vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
|
this.vocs = VocabularyGroup.loadVocsFromIS(ISLookupClientFactory.getLookUpService(isLookupUrl));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
|
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)
|
||||||
|
|
|
@ -60,6 +60,10 @@ public class OafMapperUtils {
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Qualifier unknown(final String schemeid, final String schemename) {
|
||||||
|
return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
|
||||||
|
}
|
||||||
|
|
||||||
public static Qualifier qualifier(
|
public static Qualifier qualifier(
|
||||||
final String classid,
|
final String classid,
|
||||||
final String classname,
|
final String classname,
|
||||||
|
|
|
@ -4,14 +4,29 @@ package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
|
||||||
public class Vocabulary implements Serializable {
|
public class Vocabulary implements Serializable {
|
||||||
|
|
||||||
private final String id;
|
private final String id;
|
||||||
private final String name;
|
private final String name;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Code to Term mappings for this Vocabulary.
|
||||||
|
*/
|
||||||
private final Map<String, VocabularyTerm> terms = new HashMap<>();
|
private final Map<String, VocabularyTerm> terms = new HashMap<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synonym to Code mappings for this Vocabulary.
|
||||||
|
*/
|
||||||
|
private final Map<String, String> synonyms = Maps.newHashMap();
|
||||||
|
|
||||||
public Vocabulary(final String id, final String name) {
|
public Vocabulary(final String id, final String name) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
|
@ -30,7 +45,7 @@ public class Vocabulary implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public VocabularyTerm getTerm(final String id) {
|
public VocabularyTerm getTerm(final String id) {
|
||||||
return terms.get(id.toLowerCase());
|
return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void addTerm(final String id, final String name) {
|
protected void addTerm(final String id, final String name) {
|
||||||
|
@ -40,4 +55,32 @@ public class Vocabulary implements Serializable {
|
||||||
protected boolean termExists(final String id) {
|
protected boolean termExists(final String id) {
|
||||||
return terms.containsKey(id.toLowerCase());
|
return terms.containsKey(id.toLowerCase());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void addSynonym(final String syn, final String termCode) {
|
||||||
|
synonyms.put(syn, termCode.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
public VocabularyTerm getTermBySynonym(final String syn) {
|
||||||
|
return getTerm(synonyms.get(syn));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Qualifier getTermAsQualifier(final String termId) {
|
||||||
|
if (StringUtils.isBlank(termId)) {
|
||||||
|
return OafMapperUtils.unknown(getId(), getName());
|
||||||
|
} else if (termExists(termId)) {
|
||||||
|
final VocabularyTerm t = getTerm(termId);
|
||||||
|
return OafMapperUtils.qualifier(t.getId(), t.getName(), getId(), getName());
|
||||||
|
} else {
|
||||||
|
return OafMapperUtils.qualifier(termId, termId, getId(), getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Qualifier getSynonymAsQualifier(final String syn) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(getTermBySynonym(syn))
|
||||||
|
.map(term -> getTermAsQualifier(term.getId()))
|
||||||
|
.orElse(null);
|
||||||
|
// .orElse(OafMapperUtils.unknown(getId(), getName()));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,33 +1,40 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
package eu.dnetlib.dhp.oa.graph.raw.common;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
public class VocabularyGroup implements Serializable {
|
public class VocabularyGroup implements Serializable {
|
||||||
|
|
||||||
public static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException {
|
public static final String VOCABULARIES_XQUERY = "for $x in collection(' /db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') \n"
|
||||||
final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
+
|
||||||
|
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
|
||||||
|
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
|
||||||
|
"for $term in ($x//TERM)\n" +
|
||||||
|
"return concat($vocid,' @=@ ',$vocname,' @=@ ',$term/@code,' @=@ ',$term/@english_name)";
|
||||||
|
|
||||||
final String xquery = IOUtils
|
public static final String VOCABULARY_SYNONYMS_XQUERY = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')\n"
|
||||||
.toString(
|
+
|
||||||
GenerateEntitiesApplication.class
|
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery"));
|
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
|
||||||
|
"for $term in ($x//TERM)\n" +
|
||||||
|
"for $syn in ($term//SYNONYM/@term)\n" +
|
||||||
|
"return concat($vocid,' @=@ ',$term/@code,' @=@ ', $syn)\n";
|
||||||
|
|
||||||
|
public static VocabularyGroup loadVocsFromIS(ISLookUpService isLookUpService) throws ISLookUpException {
|
||||||
|
|
||||||
final VocabularyGroup vocs = new VocabularyGroup();
|
final VocabularyGroup vocs = new VocabularyGroup();
|
||||||
|
|
||||||
for (final String s : isLookUpService.quickSearchProfile(xquery)) {
|
for (final String s : isLookUpService.quickSearchProfile(VOCABULARIES_XQUERY)) {
|
||||||
final String[] arr = s.split("@=@");
|
final String[] arr = s.split("@=@");
|
||||||
if (arr.length == 4) {
|
if (arr.length == 4) {
|
||||||
final String vocId = arr[0].trim();
|
final String vocId = arr[0].trim();
|
||||||
|
@ -40,6 +47,19 @@ public class VocabularyGroup implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
vocs.addTerm(vocId, termId, termName);
|
vocs.addTerm(vocId, termId, termName);
|
||||||
|
vocs.addSynonyms(vocId, termId, termId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (final String s : isLookUpService.quickSearchProfile(VOCABULARY_SYNONYMS_XQUERY)) {
|
||||||
|
final String[] arr = s.split("@=@");
|
||||||
|
if (arr.length == 3) {
|
||||||
|
final String vocId = arr[0].trim();
|
||||||
|
final String termId = arr[1].trim();
|
||||||
|
final String syn = arr[2].trim();
|
||||||
|
|
||||||
|
vocs.addSynonyms(vocId, termId, syn);
|
||||||
|
vocs.addSynonyms(vocId, termId, termId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,16 +86,21 @@ public class VocabularyGroup implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Qualifier getTermAsQualifier(final String vocId, final String id) {
|
public Qualifier lookup(String vocId, String id) {
|
||||||
if (StringUtils.isBlank(id)) {
|
return Optional
|
||||||
return OafMapperUtils.qualifier("UNKNOWN", "UNKNOWN", vocId, vocId);
|
.ofNullable(getSynonymAsQualifier(vocId, id))
|
||||||
} else if (termExists(vocId, id)) {
|
.orElse(getTermAsQualifier(vocId, id));
|
||||||
final Vocabulary v = vocs.get(vocId.toLowerCase());
|
|
||||||
final VocabularyTerm t = v.getTerm(id);
|
|
||||||
return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName());
|
|
||||||
} else {
|
|
||||||
return OafMapperUtils.qualifier(id, id, vocId, vocId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Qualifier getTermAsQualifier(final String vocId, final String id) {
|
||||||
|
return vocs.get(vocId.toLowerCase()).getTermAsQualifier(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Qualifier getSynonymAsQualifier(final String vocId, final String syn) {
|
||||||
|
if (StringUtils.isBlank(vocId)) {
|
||||||
|
return OafMapperUtils.unknown("", "");
|
||||||
|
}
|
||||||
|
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean termExists(final String vocId, final String id) {
|
public boolean termExists(final String vocId, final String id) {
|
||||||
|
@ -86,4 +111,16 @@ public class VocabularyGroup implements Serializable {
|
||||||
return vocs.containsKey(vocId.toLowerCase());
|
return vocs.containsKey(vocId.toLowerCase());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void addSynonyms(final String vocId, final String termId, final String syn) {
|
||||||
|
String id = Optional
|
||||||
|
.ofNullable(vocId)
|
||||||
|
.map(s -> s.toLowerCase())
|
||||||
|
.orElseThrow(
|
||||||
|
() -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]")));
|
||||||
|
Optional
|
||||||
|
.ofNullable(vocs.get(id))
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
||||||
|
.addSynonym(syn, termId);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
<workflow-app name="fix Graph" xmlns="uri:oozie:workflow:0.5">
|
<workflow-app name="clean graph" xmlns="uri:oozie:workflow:0.5">
|
||||||
|
|
||||||
<parameters>
|
<parameters>
|
||||||
<property>
|
<property>
|
||||||
|
@ -7,7 +7,7 @@
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>graphOutputPath</name>
|
<name>graphOutputPath</name>
|
||||||
<description>the target path to store fixed graph</description>
|
<description>the target path to store cleaned graph</description>
|
||||||
</property>
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>isLookupUrl</name>
|
<name>isLookupUrl</name>
|
||||||
|
@ -50,29 +50,29 @@
|
||||||
</property>
|
</property>
|
||||||
</parameters>
|
</parameters>
|
||||||
|
|
||||||
<start to="fork_fix_graph"/>
|
<start to="fork_clean_graph"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
<fork name="fork_fix_graph">
|
<fork name="fork_clean_graph">
|
||||||
<path start="fix_publication"/>
|
<path start="clean_publication"/>
|
||||||
<path start="fix_dataset"/>
|
<path start="clean_dataset"/>
|
||||||
<path start="fix_otherresearchproduct"/>
|
<path start="clean_otherresearchproduct"/>
|
||||||
<path start="fix_software"/>
|
<path start="clean_software"/>
|
||||||
<path start="fix_datasource"/>
|
<path start="clean_datasource"/>
|
||||||
<path start="fix_organization"/>
|
<path start="clean_organization"/>
|
||||||
<path start="fix_project"/>
|
<path start="clean_project"/>
|
||||||
<path start="fix_relation"/>
|
<path start="clean_relation"/>
|
||||||
</fork>
|
</fork>
|
||||||
|
|
||||||
<action name="fix_publication">
|
<action name="clean_publication">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Fix publications</name>
|
<name>Clean publications</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class>
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCoresForJoining}
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
@ -90,16 +90,16 @@
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_fix"/>
|
<ok to="wait_clean"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="fix_dataset">
|
<action name="clean_dataset">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Fix datasets</name>
|
<name>Clean datasets</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class>
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCoresForJoining}
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
@ -117,16 +117,16 @@
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_fix"/>
|
<ok to="wait_clean"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="fix_otherresearchproduct">
|
<action name="clean_otherresearchproduct">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Fix otherresearchproducts</name>
|
<name>Clean otherresearchproducts</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class>
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCoresForJoining}
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
@ -144,16 +144,16 @@
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_fix"/>
|
<ok to="wait_clean"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="fix_software">
|
<action name="clean_software">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Fix softwares</name>
|
<name>Clean softwares</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class>
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCoresForJoining}
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
@ -171,16 +171,16 @@
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_fix"/>
|
<ok to="wait_clean"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="fix_datasource">
|
<action name="clean_datasource">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Fix datasources</name>
|
<name>Clean datasources</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class>
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCoresForJoining}
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
@ -198,16 +198,16 @@
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_fix"/>
|
<ok to="wait_clean"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="fix_organization">
|
<action name="clean_organization">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Fix organizations</name>
|
<name>Clean organizations</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class>
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCoresForJoining}
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
@ -225,16 +225,16 @@
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_fix"/>
|
<ok to="wait_clean"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="fix_project">
|
<action name="clean_project">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Fix projects</name>
|
<name>Clean projects</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class>
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCoresForJoining}
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
@ -252,16 +252,16 @@
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_fix"/>
|
<ok to="wait_clean"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<action name="fix_relation">
|
<action name="clean_relation">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>Fix relations</name>
|
<name>Clean relations</name>
|
||||||
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class>
|
<class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
|
||||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
<spark-opts>
|
<spark-opts>
|
||||||
--executor-cores=${sparkExecutorCoresForJoining}
|
--executor-cores=${sparkExecutorCoresForJoining}
|
||||||
|
@ -279,11 +279,11 @@
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait_fix"/>
|
<ok to="wait_clean"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
<join name="wait_fix" to="End"/>
|
<join name="wait_clean" to="End"/>
|
||||||
|
|
||||||
<end name="End"/>
|
<end name="End"/>
|
||||||
</workflow-app>
|
</workflow-app>
|
|
@ -0,0 +1,6 @@
|
||||||
|
for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')
|
||||||
|
let $vocid := $x//VOCABULARY_NAME/@code
|
||||||
|
let $vocname := $x//VOCABULARY_NAME/text()
|
||||||
|
for $term in ($x//TERM)
|
||||||
|
for $syn in ($term//SYNONYM/@term)
|
||||||
|
return concat($vocid,' @=@ ',$term/@code,' @=@ ', $syn)
|
|
@ -0,0 +1,71 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
public class CleaningRuleTest {
|
||||||
|
|
||||||
|
public static final ObjectMapper MAPPER = new ObjectMapper();
|
||||||
|
@Mock
|
||||||
|
private ISLookUpService isLookUpService;
|
||||||
|
|
||||||
|
private VocabularyGroup vocabularies;
|
||||||
|
|
||||||
|
private CleaningRule<Publication> cleaningRule;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() throws ISLookUpException, IOException {
|
||||||
|
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
|
||||||
|
lenient()
|
||||||
|
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
|
||||||
|
.thenReturn(synonyms());
|
||||||
|
|
||||||
|
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
|
||||||
|
cleaningRule = new CleaningRule(vocabularies);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCleaning() throws Exception {
|
||||||
|
|
||||||
|
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json"));
|
||||||
|
Publication p_in = MAPPER.readValue(json, Publication.class);
|
||||||
|
|
||||||
|
Publication p_out = cleaningRule.call(p_in);
|
||||||
|
|
||||||
|
Assertions.assertNotNull(p_out);
|
||||||
|
|
||||||
|
// TODO add more assertions to verity the cleaned values
|
||||||
|
System.out.println(MAPPER.writeValueAsString(p_out));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> vocs() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(CleaningRuleTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> synonyms() throws IOException {
|
||||||
|
return IOUtils
|
||||||
|
.readLines(CleaningRuleTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,691 @@
|
||||||
|
{
|
||||||
|
"author": [
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Brien, Tom",
|
||||||
|
"name": "Tom",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 1,
|
||||||
|
"surname": "Brien"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Ade, Peter",
|
||||||
|
"name": "Peter",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 2,
|
||||||
|
"surname": "Ade"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Barry, Peter S.",
|
||||||
|
"name": "Peter S.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 3,
|
||||||
|
"surname": "Barry"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Dunscombe, Chris J.",
|
||||||
|
"name": "Chris J.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 4,
|
||||||
|
"surname": "Dunscombe"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Leadley, David R.",
|
||||||
|
"name": "David R.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 5,
|
||||||
|
"surname": "Leadley"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Morozov, Dmitry V.",
|
||||||
|
"name": "Dmitry V.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 6,
|
||||||
|
"surname": "Morozov"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Myronov, Maksym",
|
||||||
|
"name": "Maksym",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 7,
|
||||||
|
"surname": "Myronov"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Parker, Evan",
|
||||||
|
"name": "Evan",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 8,
|
||||||
|
"surname": "Parker"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Prest, Martin J.",
|
||||||
|
"name": "Martin J.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 9,
|
||||||
|
"surname": "Prest"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Prunnila, Mika",
|
||||||
|
"name": "Mika",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 10,
|
||||||
|
"surname": "Prunnila"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Sudiwala, Rashmi V.",
|
||||||
|
"name": "Rashmi V.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 11,
|
||||||
|
"surname": "Sudiwala"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Whall, Terry E.",
|
||||||
|
"name": "Terry E.",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 12,
|
||||||
|
"surname": "Whall"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": "Mauskopf",
|
||||||
|
"name": "",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 13,
|
||||||
|
"surname": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"affiliation": [
|
||||||
|
],
|
||||||
|
"fullname": " P. D. ",
|
||||||
|
"name": "",
|
||||||
|
"pid": [
|
||||||
|
],
|
||||||
|
"rank": 14,
|
||||||
|
"surname": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"bestaccessright": {
|
||||||
|
"classid": "CLOSED",
|
||||||
|
"classname": "Closed Access",
|
||||||
|
"schemeid": "dnet:access_modes",
|
||||||
|
"schemename": "dnet:access_modes"
|
||||||
|
},
|
||||||
|
"collectedfrom": [
|
||||||
|
{
|
||||||
|
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||||
|
"value": "VIRTA"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"context": [
|
||||||
|
],
|
||||||
|
"contributor": [
|
||||||
|
],
|
||||||
|
"country": [
|
||||||
|
],
|
||||||
|
"coverage": [
|
||||||
|
],
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"dateofacceptance": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "2016-01-01"
|
||||||
|
},
|
||||||
|
"dateofcollection": "",
|
||||||
|
"dateoftransformation": "2020-04-22T12:34:08.009Z",
|
||||||
|
"description": [
|
||||||
|
],
|
||||||
|
"externalReference": [
|
||||||
|
],
|
||||||
|
"extraInfo": [
|
||||||
|
],
|
||||||
|
"format": [
|
||||||
|
],
|
||||||
|
"fulltext": [
|
||||||
|
],
|
||||||
|
"id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375",
|
||||||
|
"instance": [
|
||||||
|
{
|
||||||
|
"accessright": {
|
||||||
|
"classid": "CLOSED",
|
||||||
|
"classname": "Closed Access",
|
||||||
|
"schemeid": "dnet:access_modes",
|
||||||
|
"schemename": "dnet:access_modes"
|
||||||
|
},
|
||||||
|
"collectedfrom": {
|
||||||
|
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||||
|
"value": "VIRTA"
|
||||||
|
},
|
||||||
|
"dateofacceptance": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"value": "2016-01-01"
|
||||||
|
},
|
||||||
|
"distributionlocation": "",
|
||||||
|
"hostedby": {
|
||||||
|
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
|
||||||
|
"value": "VIRTA"
|
||||||
|
},
|
||||||
|
"instancetype": {
|
||||||
|
"classid": "Comentario",
|
||||||
|
"classname": "Comentario",
|
||||||
|
"schemeid": "dnet:publication_resource",
|
||||||
|
"schemename": "dnet:publication_resource"
|
||||||
|
},
|
||||||
|
"url": [
|
||||||
|
"http://juuli.fi/Record/0275158616",
|
||||||
|
"http://dx.doi.org/10.1007/s109090161569x"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"journal": {
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"edition": "",
|
||||||
|
"ep": " 7",
|
||||||
|
"iss": "9 March",
|
||||||
|
"issnLinking": "",
|
||||||
|
"issnOnline": "",
|
||||||
|
"issnPrinted": "0022-2291",
|
||||||
|
"name": "Journal of Low Temperature Physics - Early Acces",
|
||||||
|
"sp": "1 ",
|
||||||
|
"vol": ""
|
||||||
|
},
|
||||||
|
"language": {
|
||||||
|
"classid": "en",
|
||||||
|
"classname": "en",
|
||||||
|
"schemeid": "dnet:languages",
|
||||||
|
"schemename": "dnet:languages"
|
||||||
|
},
|
||||||
|
"lastupdatetimestamp": 1591283286319,
|
||||||
|
"oaiprovenance": {
|
||||||
|
"originDescription": {
|
||||||
|
"altered": true,
|
||||||
|
"baseURL": "https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif",
|
||||||
|
"datestamp": "2019-07-30",
|
||||||
|
"harvestDate": "2020-04-22T11:04:38.685Z",
|
||||||
|
"identifier": "oai:virta-jtp.csc.fi:Publications/0275158616",
|
||||||
|
"metadataNamespace": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"originalId": [
|
||||||
|
"CSC_________::2250a70c903c6ac6e4c01438259e9375"
|
||||||
|
],
|
||||||
|
"pid": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1007/s109090161569x"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "doi",
|
||||||
|
"classname": "doi",
|
||||||
|
"schemeid": "dnet:pid_types",
|
||||||
|
"schemename": "dnet:pid_types"
|
||||||
|
},
|
||||||
|
"value": "10.1007/s109090161569x"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"relevantdate": [
|
||||||
|
],
|
||||||
|
"resourcetype": {
|
||||||
|
"classid": "0001",
|
||||||
|
"classname": "0001",
|
||||||
|
"schemeid": "dnet:dataCite_resource",
|
||||||
|
"schemename": "dnet:dataCite_resource"
|
||||||
|
},
|
||||||
|
"resulttype": {
|
||||||
|
"classid": "publication",
|
||||||
|
"classname": "publication",
|
||||||
|
"schemeid": "dnet:result_typologies",
|
||||||
|
"schemename": "dnet:result_typologies"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
],
|
||||||
|
"subject": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "ta213"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "infrared detectors"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "lens antennas"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "silicon"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "slot antennas"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "strained silicon"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "cold electron bolometers"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "doped silicon"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "measure noise"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "noise equivalent power"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "optical characterisation"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "optical response"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "photon noise"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "",
|
||||||
|
"classname": "",
|
||||||
|
"schemeid": "",
|
||||||
|
"schemename": ""
|
||||||
|
},
|
||||||
|
"value": "silicon absorbers"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": [
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"classname": "sysimport:crosswalk:datasetarchive",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "main title",
|
||||||
|
"classname": "main title",
|
||||||
|
"schemeid": "dnet:dataCite_title",
|
||||||
|
"schemename": "dnet:dataCite_title"
|
||||||
|
},
|
||||||
|
"value": "Optical response of strained- and unstrained-silicon cold-electron bolometers"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue