WIP: graph cleaner implementation

This commit is contained in:
Claudio Atzori 2020-06-09 17:20:40 +02:00
parent 3d871c6651
commit d9f33582c5
16 changed files with 3454 additions and 277 deletions

View File

@ -0,0 +1,119 @@
package eu.dnetlib.dhp.oa.graph.clean;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.beans.BeanInfo;
import java.beans.IntrospectionException;
import java.beans.Introspector;
import java.beans.PropertyDescriptor;
import java.lang.reflect.InvocationTargetException;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Predef;
public class CleanGraphProperties {
private static final Logger log = LoggerFactory.getLogger(CleanGraphProperties.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
CleanGraphProperties.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/input_clean_graph_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputPath = parser.get("inputPath");
log.info("inputPath: {}", inputPath);
String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
String graphTableClassName = parser.get("graphTableClassName");
log.info("graphTableClassName: {}", graphTableClassName);
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, outputPath);
fixGraphTable(spark, vocs, inputPath, entityClazz, outputPath);
});
}
private static <T extends Oaf> void fixGraphTable(
SparkSession spark,
VocabularyGroup vocs,
String inputPath,
Class<T> clazz,
String outputPath) {
CleaningRule<T> rule = new CleaningRule<>(vocs);
readTableFromPath(spark, inputPath, clazz)
.map(rule, Encoders.bean(clazz))
.write()
.mode(SaveMode.Overwrite)
.parquet(outputPath);
}
private static <T extends Oaf> Dataset<T> readTableFromPath(
SparkSession spark, String inputEntityPath, Class<T> clazz) {
log.info("Reading Graph table from: {}", inputEntityPath);
return spark
.read()
.textFile(inputEntityPath)
.map(
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
Encoders.bean(clazz));
}
private static void removeOutputDir(SparkSession spark, String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
}
}

View File

@ -0,0 +1,89 @@
package eu.dnetlib.dhp.oa.graph.clean;
import java.beans.BeanInfo;
import java.beans.IntrospectionException;
import java.beans.Introspector;
import java.beans.PropertyDescriptor;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
import org.apache.spark.api.java.function.MapFunction;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class CleaningRule<T extends Oaf> implements MapFunction<T, T> {
private VocabularyGroup vocabularies;
public CleaningRule(VocabularyGroup vocabularies) {
this.vocabularies = vocabularies;
}
@Override
public T call(T value) throws Exception {
doClean(value);
return value;
}
private void doClean(Object o) {
if (Objects.isNull(o)) {
return;
}
if (o instanceof Iterable) {
for (Object oi : (Iterable) o) {
doClean(oi);
}
} else {
Class clazz = o.getClass();
if (clazz.isPrimitive()
|| o instanceof Integer
|| o instanceof Double
|| o instanceof Float
|| o instanceof Long
|| o instanceof Boolean
|| o instanceof String) {
return;
} else {
try {
for (Field field : getAllFields(new LinkedList<>(), clazz)) {
field.setAccessible(true);
Object value = field.get(o);
if (value instanceof Qualifier) {
Qualifier q = (Qualifier) value;
if (vocabularies.vocabularyExists(q.getSchemeid())) {
field.set(o, vocabularies.lookup(q.getSchemeid(), q.getClassid()));
}
} else {
doClean(value);
}
}
} catch (IllegalAccessException | IllegalArgumentException e) {
throw new RuntimeException(e);
}
}
}
}
private static List<Field> getAllFields(List<Field> fields, Class<?> clazz) {
fields.addAll(Arrays.asList(clazz.getDeclaredFields()));
final Class<?> superclass = clazz.getSuperclass();
if (Objects.nonNull(superclass) && superclass.getPackage().equals(Oaf.class.getPackage())) {
getAllFields(fields, superclass);
}
return fields;
}
}

View File

@ -1,210 +0,0 @@
package eu.dnetlib.dhp.oa.graph.fix;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
public class FixGraphProperties {
private static final Logger log = LoggerFactory.getLogger(FixGraphProperties.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils
.toString(
FixGraphProperties.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/graph/input_fix_graph_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputPath = parser.get("inputPath");
log.info("inputPath: {}", inputPath);
String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
String graphTableClassName = parser.get("graphTableClassName");
log.info("graphTableClassName: {}", graphTableClassName);
Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl);
SparkConf conf = new SparkConf();
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, outputPath);
fixGraphTable(spark, vocs, inputPath, entityClazz, outputPath);
});
}
private static <T extends Oaf> void fixGraphTable(
SparkSession spark,
VocabularyGroup vocs,
String inputPath,
Class<T> clazz,
String outputPath) {
MapFunction<T, T> fixFn = getFixingFunction(vocs, clazz);
readTableFromPath(spark, inputPath, clazz)
.map(fixFn, Encoders.bean(clazz))
.write()
.mode(SaveMode.Overwrite)
.parquet(outputPath);
}
private static <T extends Oaf> MapFunction<T, T> getFixingFunction(VocabularyGroup vocs, Class<T> clazz) {
switch (clazz.getCanonicalName()) {
case "eu.dnetlib.dhp.schema.oaf.Publication":
case "eu.dnetlib.dhp.schema.oaf.Dataset":
case "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct":
case "eu.dnetlib.dhp.schema.oaf.Software":
return (MapFunction<T, T>) value -> {
Result r = (Result) value;
if (r.getLanguage() != null) {
r.setLanguage(vocs.getTermAsQualifier("dnet:languages", "und"));
} else {
r.setLanguage(vocs.getTermAsQualifier("dnet:languages", r.getLanguage().getClassid()));
}
if (r.getCountry() != null) {
r.setCountry(
r.getCountry()
.stream()
.filter(Objects::nonNull)
.map(c -> {
Qualifier q = vocs.getTermAsQualifier("dnet:countries", c.getClassid());
Country cn = new Country();
cn.setDataInfo(c.getDataInfo());
cn.setClassid(q.getClassid());
cn.setClassname(cn.getClassname());
cn.setSchemeid("dnet:countries");
cn.setSchemename("dnet:countries");
return cn;
})
.collect(Collectors.toList()));
}
if (r.getSubject() != null) {
r.setSubject(
r.getSubject()
.stream()
.filter(Objects::nonNull)
.map(s -> {
if (s.getQualifier() == null || StringUtils.isBlank(s.getQualifier().getClassid())) {
s.setQualifier(vocs.getTermAsQualifier("dnet:subject_classification_typologies", "UNKNOWN"));
}
})
.collect(Collectors.toList())
);
}
if (r.getPublisher() != null && StringUtils.isBlank(r.getPublisher().getValue())) {
r.setPublisher(null);
}
if (r.getBestaccessright() == null) {
r.setBestaccessright(vocs.getTermAsQualifier("dnet:access_modes", "UNKNOWN"));
}
if (r.getInstance() != null) {
for(Instance i : r.getInstance()) {
if (i.getAccessright() == null) {
i.setAccessright(vocs.getTermAsQualifier("dnet:access_modes", "UNKNOWN"));
}
if (i.getInstancetype() != null) {
i.setInstancetype(vocs.getTermAsQualifier("dnet:publication_resource", i.getInstancetype().getClassid()));
} else {
i.setInstancetype(vocs.getTermAsQualifier("dnet:publication_resource", "0000"));
}
}
}
return clazz.cast(r);
};
case "eu.dnetlib.dhp.schema.oaf.Datasource":
return (MapFunction<T, T>) value -> {
return value;
};
case "eu.dnetlib.dhp.schema.oaf.Organization":
return (MapFunction<T, T>) value -> {
Organization o = (Organization) value;
if (o.getCountry() == null) {
o.setCountry(vocs.getTermAsQualifier("dnet:countries", "UNKNOWN"));
} else {
o.setCountry(vocs.getTermAsQualifier("dnet:countries", o.getCountry().getClassid()));
}
return clazz.cast(o);
};
case "eu.dnetlib.dhp.schema.oaf.Project":
return (MapFunction<T, T>) value -> {
return value;
};
case "eu.dnetlib.dhp.schema.oaf.Relation":
return (MapFunction<T, T>) value -> {
return value;
};
default:
throw new RuntimeException("unknown class: " + clazz.getCanonicalName());
}
}
private static <T extends Oaf> Dataset<T> readTableFromPath(
SparkSession spark, String inputEntityPath, Class<T> clazz) {
log.info("Reading Graph table from: {}", inputEntityPath);
return spark
.read()
.textFile(inputEntityPath)
.map(
(MapFunction<String, T>) value -> OBJECT_MAPPER.readValue(value, clazz),
Encoders.bean(clazz));
}
private static void removeOutputDir(SparkSession spark, String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
}
}

View File

@ -39,6 +39,8 @@ import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2; import scala.Tuple2;
public class GenerateEntitiesApplication { public class GenerateEntitiesApplication {
@ -71,7 +73,8 @@ public class GenerateEntitiesApplication {
final String isLookupUrl = parser.get("isLookupUrl"); final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl); log.info("isLookupUrl: {}", isLookupUrl);
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl); final ISLookUpService isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl);
final VocabularyGroup vocs = VocabularyGroup.loadVocsFromIS(isLookupService);
final SparkConf conf = new SparkConf(); final SparkConf conf = new SparkConf();
runWithSparkSession(conf, isSparkSessionManaged, spark -> { runWithSparkSession(conf, isSparkSessionManaged, spark -> {

View File

@ -71,6 +71,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable { public class MigrateDbEntitiesApplication extends AbstractMigrationApplication implements Closeable {
@ -151,7 +152,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
super(hdfsPath); super(hdfsPath);
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.lastUpdateTimestamp = new Date().getTime(); this.lastUpdateTimestamp = new Date().getTime();
this.vocs = VocabularyGroup.loadVocsFromIS(isLookupUrl); this.vocs = VocabularyGroup.loadVocsFromIS(ISLookupClientFactory.getLookUpService(isLookupUrl));
} }
public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer) public void execute(final String sqlFile, final Function<ResultSet, List<Oaf>> producer)

View File

@ -60,6 +60,10 @@ public class OafMapperUtils {
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public static Qualifier unknown(final String schemeid, final String schemename) {
return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
}
public static Qualifier qualifier( public static Qualifier qualifier(
final String classid, final String classid,
final String classname, final String classname,

View File

@ -4,14 +4,29 @@ package eu.dnetlib.dhp.oa.graph.raw.common;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Optional;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
public class Vocabulary implements Serializable { public class Vocabulary implements Serializable {
private final String id; private final String id;
private final String name; private final String name;
/**
* Code to Term mappings for this Vocabulary.
*/
private final Map<String, VocabularyTerm> terms = new HashMap<>(); private final Map<String, VocabularyTerm> terms = new HashMap<>();
/**
* Synonym to Code mappings for this Vocabulary.
*/
private final Map<String, String> synonyms = Maps.newHashMap();
public Vocabulary(final String id, final String name) { public Vocabulary(final String id, final String name) {
this.id = id; this.id = id;
this.name = name; this.name = name;
@ -30,7 +45,7 @@ public class Vocabulary implements Serializable {
} }
public VocabularyTerm getTerm(final String id) { public VocabularyTerm getTerm(final String id) {
return terms.get(id.toLowerCase()); return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null);
} }
protected void addTerm(final String id, final String name) { protected void addTerm(final String id, final String name) {
@ -40,4 +55,32 @@ public class Vocabulary implements Serializable {
protected boolean termExists(final String id) { protected boolean termExists(final String id) {
return terms.containsKey(id.toLowerCase()); return terms.containsKey(id.toLowerCase());
} }
protected void addSynonym(final String syn, final String termCode) {
synonyms.put(syn, termCode.toLowerCase());
}
public VocabularyTerm getTermBySynonym(final String syn) {
return getTerm(synonyms.get(syn));
}
public Qualifier getTermAsQualifier(final String termId) {
if (StringUtils.isBlank(termId)) {
return OafMapperUtils.unknown(getId(), getName());
} else if (termExists(termId)) {
final VocabularyTerm t = getTerm(termId);
return OafMapperUtils.qualifier(t.getId(), t.getName(), getId(), getName());
} else {
return OafMapperUtils.qualifier(termId, termId, getId(), getName());
}
}
public Qualifier getSynonymAsQualifier(final String syn) {
return Optional
.ofNullable(getTermBySynonym(syn))
.map(term -> getTermAsQualifier(term.getId()))
.orElse(null);
// .orElse(OafMapperUtils.unknown(getId(), getName()));
}
} }

View File

@ -1,33 +1,40 @@
package eu.dnetlib.dhp.oa.graph.raw.common; package eu.dnetlib.dhp.oa.graph.raw.common;
import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class VocabularyGroup implements Serializable { public class VocabularyGroup implements Serializable {
public static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException { public static final String VOCABULARIES_XQUERY = "for $x in collection(' /db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') \n"
final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl); +
"let $vocid := $x//VOCABULARY_NAME/@code\n" +
"let $vocname := $x//VOCABULARY_NAME/text()\n" +
"for $term in ($x//TERM)\n" +
"return concat($vocid,' @=@ ',$vocname,' @=@ ',$term/@code,' @=@ ',$term/@english_name)";
final String xquery = IOUtils public static final String VOCABULARY_SYNONYMS_XQUERY = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')\n"
.toString( +
GenerateEntitiesApplication.class "let $vocid := $x//VOCABULARY_NAME/@code\n" +
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery")); "let $vocname := $x//VOCABULARY_NAME/text()\n" +
"for $term in ($x//TERM)\n" +
"for $syn in ($term//SYNONYM/@term)\n" +
"return concat($vocid,' @=@ ',$term/@code,' @=@ ', $syn)\n";
public static VocabularyGroup loadVocsFromIS(ISLookUpService isLookUpService) throws ISLookUpException {
final VocabularyGroup vocs = new VocabularyGroup(); final VocabularyGroup vocs = new VocabularyGroup();
for (final String s : isLookUpService.quickSearchProfile(xquery)) { for (final String s : isLookUpService.quickSearchProfile(VOCABULARIES_XQUERY)) {
final String[] arr = s.split("@=@"); final String[] arr = s.split("@=@");
if (arr.length == 4) { if (arr.length == 4) {
final String vocId = arr[0].trim(); final String vocId = arr[0].trim();
@ -40,6 +47,19 @@ public class VocabularyGroup implements Serializable {
} }
vocs.addTerm(vocId, termId, termName); vocs.addTerm(vocId, termId, termName);
vocs.addSynonyms(vocId, termId, termId);
}
}
for (final String s : isLookUpService.quickSearchProfile(VOCABULARY_SYNONYMS_XQUERY)) {
final String[] arr = s.split("@=@");
if (arr.length == 3) {
final String vocId = arr[0].trim();
final String termId = arr[1].trim();
final String syn = arr[2].trim();
vocs.addSynonyms(vocId, termId, syn);
vocs.addSynonyms(vocId, termId, termId);
} }
} }
@ -66,16 +86,21 @@ public class VocabularyGroup implements Serializable {
} }
} }
public Qualifier getTermAsQualifier(final String vocId, final String id) { public Qualifier lookup(String vocId, String id) {
if (StringUtils.isBlank(id)) { return Optional
return OafMapperUtils.qualifier("UNKNOWN", "UNKNOWN", vocId, vocId); .ofNullable(getSynonymAsQualifier(vocId, id))
} else if (termExists(vocId, id)) { .orElse(getTermAsQualifier(vocId, id));
final Vocabulary v = vocs.get(vocId.toLowerCase());
final VocabularyTerm t = v.getTerm(id);
return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName());
} else {
return OafMapperUtils.qualifier(id, id, vocId, vocId);
} }
public Qualifier getTermAsQualifier(final String vocId, final String id) {
return vocs.get(vocId.toLowerCase()).getTermAsQualifier(id);
}
public Qualifier getSynonymAsQualifier(final String vocId, final String syn) {
if (StringUtils.isBlank(vocId)) {
return OafMapperUtils.unknown("", "");
}
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
} }
public boolean termExists(final String vocId, final String id) { public boolean termExists(final String vocId, final String id) {
@ -86,4 +111,16 @@ public class VocabularyGroup implements Serializable {
return vocs.containsKey(vocId.toLowerCase()); return vocs.containsKey(vocId.toLowerCase());
} }
private void addSynonyms(final String vocId, final String termId, final String syn) {
String id = Optional
.ofNullable(vocId)
.map(s -> s.toLowerCase())
.orElseThrow(
() -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]")));
Optional
.ofNullable(vocs.get(id))
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
.addSynonym(syn, termId);
}
} }

View File

@ -1,4 +1,4 @@
<workflow-app name="fix Graph" xmlns="uri:oozie:workflow:0.5"> <workflow-app name="clean graph" xmlns="uri:oozie:workflow:0.5">
<parameters> <parameters>
<property> <property>
@ -7,7 +7,7 @@
</property> </property>
<property> <property>
<name>graphOutputPath</name> <name>graphOutputPath</name>
<description>the target path to store fixed graph</description> <description>the target path to store cleaned graph</description>
</property> </property>
<property> <property>
<name>isLookupUrl</name> <name>isLookupUrl</name>
@ -50,29 +50,29 @@
</property> </property>
</parameters> </parameters>
<start to="fork_fix_graph"/> <start to="fork_clean_graph"/>
<kill name="Kill"> <kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill> </kill>
<fork name="fork_fix_graph"> <fork name="fork_clean_graph">
<path start="fix_publication"/> <path start="clean_publication"/>
<path start="fix_dataset"/> <path start="clean_dataset"/>
<path start="fix_otherresearchproduct"/> <path start="clean_otherresearchproduct"/>
<path start="fix_software"/> <path start="clean_software"/>
<path start="fix_datasource"/> <path start="clean_datasource"/>
<path start="fix_organization"/> <path start="clean_organization"/>
<path start="fix_project"/> <path start="clean_project"/>
<path start="fix_relation"/> <path start="clean_relation"/>
</fork> </fork>
<action name="fix_publication"> <action name="clean_publication">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Fix publications</name> <name>Clean publications</name>
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class> <class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCoresForJoining} --executor-cores=${sparkExecutorCoresForJoining}
@ -90,16 +90,16 @@
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
<ok to="wait_fix"/> <ok to="wait_clean"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="fix_dataset"> <action name="clean_dataset">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Fix datasets</name> <name>Clean datasets</name>
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class> <class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCoresForJoining} --executor-cores=${sparkExecutorCoresForJoining}
@ -117,16 +117,16 @@
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
<ok to="wait_fix"/> <ok to="wait_clean"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="fix_otherresearchproduct"> <action name="clean_otherresearchproduct">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Fix otherresearchproducts</name> <name>Clean otherresearchproducts</name>
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class> <class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCoresForJoining} --executor-cores=${sparkExecutorCoresForJoining}
@ -144,16 +144,16 @@
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
<ok to="wait_fix"/> <ok to="wait_clean"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="fix_software"> <action name="clean_software">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Fix softwares</name> <name>Clean softwares</name>
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class> <class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCoresForJoining} --executor-cores=${sparkExecutorCoresForJoining}
@ -171,16 +171,16 @@
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
<ok to="wait_fix"/> <ok to="wait_clean"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="fix_datasource"> <action name="clean_datasource">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Fix datasources</name> <name>Clean datasources</name>
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class> <class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCoresForJoining} --executor-cores=${sparkExecutorCoresForJoining}
@ -198,16 +198,16 @@
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Datasource</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
<ok to="wait_fix"/> <ok to="wait_clean"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="fix_organization"> <action name="clean_organization">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Fix organizations</name> <name>Clean organizations</name>
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class> <class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCoresForJoining} --executor-cores=${sparkExecutorCoresForJoining}
@ -225,16 +225,16 @@
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Organization</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
<ok to="wait_fix"/> <ok to="wait_clean"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="fix_project"> <action name="clean_project">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Fix projects</name> <name>Clean projects</name>
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class> <class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCoresForJoining} --executor-cores=${sparkExecutorCoresForJoining}
@ -252,16 +252,16 @@
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Project</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
<ok to="wait_fix"/> <ok to="wait_clean"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<action name="fix_relation"> <action name="clean_relation">
<spark xmlns="uri:oozie:spark-action:0.2"> <spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master> <master>yarn</master>
<mode>cluster</mode> <mode>cluster</mode>
<name>Fix relations</name> <name>Clean relations</name>
<class>eu.dnetlib.dhp.oa.graph.fix.FixGraphProperties</class> <class>eu.dnetlib.dhp.oa.graph.clean.CleanGraphProperties</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar> <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts> <spark-opts>
--executor-cores=${sparkExecutorCoresForJoining} --executor-cores=${sparkExecutorCoresForJoining}
@ -279,11 +279,11 @@
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg> <arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg> <arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
</spark> </spark>
<ok to="wait_fix"/> <ok to="wait_clean"/>
<error to="Kill"/> <error to="Kill"/>
</action> </action>
<join name="wait_fix" to="End"/> <join name="wait_clean" to="End"/>
<end name="End"/> <end name="End"/>
</workflow-app> </workflow-app>

View File

@ -0,0 +1,6 @@
for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')
let $vocid := $x//VOCABULARY_NAME/@code
let $vocname := $x//VOCABULARY_NAME/text()
for $term in ($x//TERM)
for $syn in ($term//SYNONYM/@term)
return concat($vocid,' @=@ ',$term/@code,' @=@ ', $syn)

View File

@ -0,0 +1,71 @@
package eu.dnetlib.dhp.oa.graph.clean;
import static org.mockito.Mockito.lenient;
import java.io.IOException;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ExtendWith(MockitoExtension.class)
public class CleaningRuleTest {
public static final ObjectMapper MAPPER = new ObjectMapper();
@Mock
private ISLookUpService isLookUpService;
private VocabularyGroup vocabularies;
private CleaningRule<Publication> cleaningRule;
@BeforeEach
public void setUp() throws ISLookUpException, IOException {
lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs());
lenient()
.when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY))
.thenReturn(synonyms());
vocabularies = VocabularyGroup.loadVocsFromIS(isLookUpService);
cleaningRule = new CleaningRule(vocabularies);
}
@Test
public void testCleaning() throws Exception {
String json = IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/result.json"));
Publication p_in = MAPPER.readValue(json, Publication.class);
Publication p_out = cleaningRule.call(p_in);
Assertions.assertNotNull(p_out);
// TODO add more assertions to verity the cleaned values
System.out.println(MAPPER.writeValueAsString(p_out));
}
private List<String> vocs() throws IOException {
return IOUtils
.readLines(CleaningRuleTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"));
}
private List<String> synonyms() throws IOException {
return IOUtils
.readLines(CleaningRuleTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"));
}
}

View File

@ -0,0 +1,691 @@
{
"author": [
{
"affiliation": [
],
"fullname": "Brien, Tom",
"name": "Tom",
"pid": [
],
"rank": 1,
"surname": "Brien"
},
{
"affiliation": [
],
"fullname": "Ade, Peter",
"name": "Peter",
"pid": [
],
"rank": 2,
"surname": "Ade"
},
{
"affiliation": [
],
"fullname": "Barry, Peter S.",
"name": "Peter S.",
"pid": [
],
"rank": 3,
"surname": "Barry"
},
{
"affiliation": [
],
"fullname": "Dunscombe, Chris J.",
"name": "Chris J.",
"pid": [
],
"rank": 4,
"surname": "Dunscombe"
},
{
"affiliation": [
],
"fullname": "Leadley, David R.",
"name": "David R.",
"pid": [
],
"rank": 5,
"surname": "Leadley"
},
{
"affiliation": [
],
"fullname": "Morozov, Dmitry V.",
"name": "Dmitry V.",
"pid": [
],
"rank": 6,
"surname": "Morozov"
},
{
"affiliation": [
],
"fullname": "Myronov, Maksym",
"name": "Maksym",
"pid": [
],
"rank": 7,
"surname": "Myronov"
},
{
"affiliation": [
],
"fullname": "Parker, Evan",
"name": "Evan",
"pid": [
],
"rank": 8,
"surname": "Parker"
},
{
"affiliation": [
],
"fullname": "Prest, Martin J.",
"name": "Martin J.",
"pid": [
],
"rank": 9,
"surname": "Prest"
},
{
"affiliation": [
],
"fullname": "Prunnila, Mika",
"name": "Mika",
"pid": [
],
"rank": 10,
"surname": "Prunnila"
},
{
"affiliation": [
],
"fullname": "Sudiwala, Rashmi V.",
"name": "Rashmi V.",
"pid": [
],
"rank": 11,
"surname": "Sudiwala"
},
{
"affiliation": [
],
"fullname": "Whall, Terry E.",
"name": "Terry E.",
"pid": [
],
"rank": 12,
"surname": "Whall"
},
{
"affiliation": [
],
"fullname": "Mauskopf",
"name": "",
"pid": [
],
"rank": 13,
"surname": ""
},
{
"affiliation": [
],
"fullname": " P. D. ",
"name": "",
"pid": [
],
"rank": 14,
"surname": ""
}
],
"bestaccessright": {
"classid": "CLOSED",
"classname": "Closed Access",
"schemeid": "dnet:access_modes",
"schemename": "dnet:access_modes"
},
"collectedfrom": [
{
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
"value": "VIRTA"
}
],
"context": [
],
"contributor": [
],
"country": [
],
"coverage": [
],
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"dateofacceptance": {
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"value": "2016-01-01"
},
"dateofcollection": "",
"dateoftransformation": "2020-04-22T12:34:08.009Z",
"description": [
],
"externalReference": [
],
"extraInfo": [
],
"format": [
],
"fulltext": [
],
"id": "50|CSC_________::2250a70c903c6ac6e4c01438259e9375",
"instance": [
{
"accessright": {
"classid": "CLOSED",
"classname": "Closed Access",
"schemeid": "dnet:access_modes",
"schemename": "dnet:access_modes"
},
"collectedfrom": {
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
"value": "VIRTA"
},
"dateofacceptance": {
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"value": "2016-01-01"
},
"distributionlocation": "",
"hostedby": {
"key": "10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747",
"value": "VIRTA"
},
"instancetype": {
"classid": "Comentario",
"classname": "Comentario",
"schemeid": "dnet:publication_resource",
"schemename": "dnet:publication_resource"
},
"url": [
"http://juuli.fi/Record/0275158616",
"http://dx.doi.org/10.1007/s109090161569x"
]
}
],
"journal": {
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"edition": "",
"ep": " 7",
"iss": "9 March",
"issnLinking": "",
"issnOnline": "",
"issnPrinted": "0022-2291",
"name": "Journal of Low Temperature Physics - Early Acces",
"sp": "1 ",
"vol": ""
},
"language": {
"classid": "en",
"classname": "en",
"schemeid": "dnet:languages",
"schemename": "dnet:languages"
},
"lastupdatetimestamp": 1591283286319,
"oaiprovenance": {
"originDescription": {
"altered": true,
"baseURL": "https%3A%2F%2Fvirta-jtp.csc.fi%2Fapi%2Fcerif",
"datestamp": "2019-07-30",
"harvestDate": "2020-04-22T11:04:38.685Z",
"identifier": "oai:virta-jtp.csc.fi:Publications/0275158616",
"metadataNamespace": ""
}
},
"originalId": [
"CSC_________::2250a70c903c6ac6e4c01438259e9375"
],
"pid": [
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "doi",
"classname": "doi",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1007/s109090161569x"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "doi",
"classname": "doi",
"schemeid": "dnet:pid_types",
"schemename": "dnet:pid_types"
},
"value": "10.1007/s109090161569x"
}
],
"relevantdate": [
],
"resourcetype": {
"classid": "0001",
"classname": "0001",
"schemeid": "dnet:dataCite_resource",
"schemename": "dnet:dataCite_resource"
},
"resulttype": {
"classid": "publication",
"classname": "publication",
"schemeid": "dnet:result_typologies",
"schemename": "dnet:result_typologies"
},
"source": [
],
"subject": [
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "ta213"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "infrared detectors"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "lens antennas"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "silicon"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "slot antennas"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "strained silicon"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "cold electron bolometers"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "doped silicon"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "measure noise"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "noise equivalent power"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "optical characterisation"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "optical response"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "photon noise"
},
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "",
"classname": "",
"schemeid": "",
"schemename": ""
},
"value": "silicon absorbers"
}
],
"title": [
{
"dataInfo": {
"deletedbyinference": false,
"inferenceprovenance": "",
"inferred": false,
"invisible": false,
"provenanceaction": {
"classid": "sysimport:crosswalk:datasetarchive",
"classname": "sysimport:crosswalk:datasetarchive",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
},
"trust": "0.9"
},
"qualifier": {
"classid": "main title",
"classname": "main title",
"schemeid": "dnet:dataCite_title",
"schemename": "dnet:dataCite_title"
},
"value": "Optical response of strained- and unstrained-silicon cold-electron bolometers"
}
]
}