forked from D-Net/dnet-hadoop
Merge pull request 'subjects cleaning' (#239) from clean_subjects into beta
Reviewed-on: D-Net/dnet-hadoop#239
This commit is contained in:
commit
5066db3386
|
@ -83,4 +83,10 @@ public class Vocabulary implements Serializable {
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Qualifier lookup(String id) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(getSynonymAsQualifier(id))
|
||||||
|
.orElse(getTermAsQualifier(id));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,6 +81,13 @@ public class VocabularyGroup implements Serializable {
|
||||||
vocs.put(id.toLowerCase(), new Vocabulary(id, name));
|
vocs.put(id.toLowerCase(), new Vocabulary(id, name));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Optional<Vocabulary> find(final String vocId) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(vocId)
|
||||||
|
.map(String::toLowerCase)
|
||||||
|
.map(vocs::get);
|
||||||
|
}
|
||||||
|
|
||||||
public void addTerm(final String vocId, final String id, final String name) {
|
public void addTerm(final String vocId, final String id, final String name) {
|
||||||
if (vocabularyExists(vocId)) {
|
if (vocabularyExists(vocId)) {
|
||||||
vocs.get(vocId.toLowerCase()).addTerm(id, name);
|
vocs.get(vocId.toLowerCase()).addTerm(id, name);
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf.utils;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
|
||||||
|
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
import java.time.ZoneId;
|
import java.time.ZoneId;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
|
@ -16,7 +18,6 @@ import com.github.sisyphsu.dateparser.DateParserUtils;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.Vocabulary;
|
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
|
@ -191,8 +192,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
|
qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(r.getSubject())) {
|
if (Objects.nonNull(r.getSubject())) {
|
||||||
r
|
List<Subject> subjects = Lists
|
||||||
.setSubject(
|
.newArrayList(
|
||||||
r
|
r
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.stream()
|
.stream()
|
||||||
|
@ -201,7 +202,18 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
||||||
.map(GraphCleaningFunctions::cleanValue)
|
.map(GraphCleaningFunctions::cleanValue)
|
||||||
.collect(Collectors.toList()));
|
.collect(
|
||||||
|
Collectors
|
||||||
|
.toMap(
|
||||||
|
s -> Optional
|
||||||
|
.ofNullable(s.getQualifier())
|
||||||
|
.map(q -> q.getClassid() + s.getValue())
|
||||||
|
.orElse(s.getValue()),
|
||||||
|
Function.identity(),
|
||||||
|
(s1, s2) -> Collections
|
||||||
|
.min(Lists.newArrayList(s1, s1), new SubjectProvenanceComparator())))
|
||||||
|
.values());
|
||||||
|
r.setSubject(subjects);
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(r.getTitle())) {
|
if (Objects.nonNull(r.getTitle())) {
|
||||||
r
|
r
|
||||||
|
@ -382,14 +394,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
.filter(p -> StringUtils.isNotBlank(p.getValue()))
|
.filter(p -> StringUtils.isNotBlank(p.getValue()))
|
||||||
.map(p -> {
|
.map(p -> {
|
||||||
// hack to distinguish orcid from orcid_pending
|
// hack to distinguish orcid from orcid_pending
|
||||||
String pidProvenance = Optional
|
String pidProvenance = getProvenance(p.getDataInfo());
|
||||||
.ofNullable(p.getDataInfo())
|
|
||||||
.map(
|
|
||||||
d -> Optional
|
|
||||||
.ofNullable(d.getProvenanceaction())
|
|
||||||
.map(Qualifier::getClassid)
|
|
||||||
.orElse(""))
|
|
||||||
.orElse("");
|
|
||||||
if (p
|
if (p
|
||||||
.getQualifier()
|
.getQualifier()
|
||||||
.getClassid()
|
.getClassid()
|
||||||
|
@ -520,6 +525,11 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static Subject cleanValue(Subject s) {
|
||||||
|
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
protected static Field<String> cleanValue(Field<String> s) {
|
protected static Field<String> cleanValue(Field<String> s) {
|
||||||
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
|
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
|
||||||
return s;
|
return s;
|
||||||
|
|
|
@ -14,6 +14,7 @@ import java.util.stream.Collectors;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
|
@ -141,7 +142,7 @@ public class OafMapperUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Qualifier unknown(final String schemeid, final String schemename) {
|
public static Qualifier unknown(final String schemeid, final String schemename) {
|
||||||
return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
|
return qualifier(UNKNOWN, "Unknown", schemeid, schemename);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static AccessRight accessRight(
|
public static AccessRight accessRight(
|
||||||
|
@ -189,6 +190,17 @@ public class OafMapperUtils {
|
||||||
return q;
|
return q;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Subject subject(
|
||||||
|
final String value,
|
||||||
|
final String classid,
|
||||||
|
final String classname,
|
||||||
|
final String schemeid,
|
||||||
|
final String schemename,
|
||||||
|
final DataInfo dataInfo) {
|
||||||
|
|
||||||
|
return subject(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
|
||||||
|
}
|
||||||
|
|
||||||
public static StructuredProperty structuredProperty(
|
public static StructuredProperty structuredProperty(
|
||||||
final String value,
|
final String value,
|
||||||
final String classid,
|
final String classid,
|
||||||
|
@ -200,6 +212,20 @@ public class OafMapperUtils {
|
||||||
return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
|
return structuredProperty(value, qualifier(classid, classname, schemeid, schemename), dataInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Subject subject(
|
||||||
|
final String value,
|
||||||
|
final Qualifier qualifier,
|
||||||
|
final DataInfo dataInfo) {
|
||||||
|
if (value == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final Subject s = new Subject();
|
||||||
|
s.setValue(value);
|
||||||
|
s.setQualifier(qualifier);
|
||||||
|
s.setDataInfo(dataInfo);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
public static StructuredProperty structuredProperty(
|
public static StructuredProperty structuredProperty(
|
||||||
final String value,
|
final String value,
|
||||||
final Qualifier qualifier,
|
final Qualifier qualifier,
|
||||||
|
@ -477,4 +503,15 @@ public class OafMapperUtils {
|
||||||
rel.setProperties(properties);
|
rel.setProperties(properties);
|
||||||
return rel;
|
return rel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String getProvenance(DataInfo dataInfo) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(dataInfo)
|
||||||
|
.map(
|
||||||
|
d -> Optional
|
||||||
|
.ofNullable(d.getProvenanceaction())
|
||||||
|
.map(Qualifier::getClassid)
|
||||||
|
.orElse(""))
|
||||||
|
.orElse("");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.getProvenance;
|
||||||
|
import static org.apache.commons.lang3.StringUtils.isBlank;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||||
|
|
||||||
|
public class SubjectProvenanceComparator implements Comparator<Subject> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(Subject left, Subject right) {
|
||||||
|
|
||||||
|
String lProv = getProvenance(left.getDataInfo());
|
||||||
|
String rProv = getProvenance(right.getDataInfo());
|
||||||
|
|
||||||
|
if (isBlank(lProv) && isBlank(rProv))
|
||||||
|
return 0;
|
||||||
|
if (isBlank(lProv))
|
||||||
|
return 1;
|
||||||
|
if (isBlank(rProv))
|
||||||
|
return -1;
|
||||||
|
if (lProv.equals(rProv))
|
||||||
|
return 0;
|
||||||
|
if (lProv.toLowerCase().contains("crosswalk"))
|
||||||
|
return -1;
|
||||||
|
if (rProv.toLowerCase().contains("crosswalk"))
|
||||||
|
return 1;
|
||||||
|
if (lProv.toLowerCase().contains("user"))
|
||||||
|
return -1;
|
||||||
|
if (rProv.toLowerCase().contains("user"))
|
||||||
|
return 1;
|
||||||
|
if (lProv.toLowerCase().contains("propagation"))
|
||||||
|
return -1;
|
||||||
|
if (rProv.toLowerCase().contains("propagation"))
|
||||||
|
return 1;
|
||||||
|
if (lProv.toLowerCase().contains("iis"))
|
||||||
|
return -1;
|
||||||
|
if (rProv.toLowerCase().contains("iis"))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
public class Constants {
|
public class Constants {
|
||||||
|
@ -58,13 +59,13 @@ public class Constants {
|
||||||
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static StructuredProperty getSubject(String sbj, String classid, String classname,
|
public static Subject getSubject(String sbj, String classid, String classname,
|
||||||
String diqualifierclassid) {
|
String diqualifierclassid) {
|
||||||
if (sbj.equals(NULL))
|
if (sbj.equals(NULL))
|
||||||
return null;
|
return null;
|
||||||
StructuredProperty sp = new StructuredProperty();
|
Subject s = new Subject();
|
||||||
sp.setValue(sbj);
|
s.setValue(sbj);
|
||||||
sp
|
s
|
||||||
.setQualifier(
|
.setQualifier(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
|
@ -72,7 +73,7 @@ public class Constants {
|
||||||
classname,
|
classname,
|
||||||
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES,
|
||||||
ModelConstants.DNET_SUBJECT_TYPOLOGIES));
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES));
|
||||||
sp
|
s
|
||||||
.setDataInfo(
|
.setDataInfo(
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.dataInfo(
|
.dataInfo(
|
||||||
|
@ -88,7 +89,7 @@ public class Constants {
|
||||||
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
ModelConstants.DNET_PROVENANCE_ACTIONS),
|
||||||
""));
|
""));
|
||||||
|
|
||||||
return sp;
|
return s;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
@ -79,7 +80,7 @@ public class PrepareFOSSparkJob implements Serializable {
|
||||||
HashSet<String> level3 = new HashSet<>();
|
HashSet<String> level3 = new HashSet<>();
|
||||||
addLevels(level1, level2, level3, first);
|
addLevels(level1, level2, level3, first);
|
||||||
it.forEachRemaining(v -> addLevels(level1, level2, level3, v));
|
it.forEachRemaining(v -> addLevels(level1, level2, level3, v));
|
||||||
List<StructuredProperty> sbjs = new ArrayList<>();
|
List<Subject> sbjs = new ArrayList<>();
|
||||||
level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
level1.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
||||||
level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
level2.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
||||||
level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
level3.forEach(l -> sbjs.add(getSubject(l, FOS_CLASS_ID, FOS_CLASS_NAME, UPDATE_SUBJECT_FOS_CLASS_ID)));
|
||||||
|
|
|
@ -24,6 +24,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
@ -73,7 +74,7 @@ public class PrepareSDGSparkJob implements Serializable {
|
||||||
Result r = new Result();
|
Result r = new Result();
|
||||||
r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI));
|
r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI));
|
||||||
SDGDataModel first = it.next();
|
SDGDataModel first = it.next();
|
||||||
List<StructuredProperty> sbjs = new ArrayList<>();
|
List<Subject> sbjs = new ArrayList<>();
|
||||||
sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID));
|
sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID));
|
||||||
it
|
it
|
||||||
.forEachRemaining(
|
.forEachRemaining(
|
||||||
|
|
|
@ -19,7 +19,7 @@ import java.time.chrono.ThaiBuddhistDate
|
||||||
import java.time.format.DateTimeFormatter
|
import java.time.format.DateTimeFormatter
|
||||||
import java.util.{Date, Locale}
|
import java.util.{Date, Locale}
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.io.{Codec, Source}
|
import scala.io.Source
|
||||||
|
|
||||||
object DataciteToOAFTransformation {
|
object DataciteToOAFTransformation {
|
||||||
|
|
||||||
|
@ -252,7 +252,7 @@ object DataciteToOAFTransformation {
|
||||||
.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue))
|
.exists(i => i.getHostedby != null && "figshare".equalsIgnoreCase(i.getHostedby.getValue))
|
||||||
if (hosted_by_figshare) {
|
if (hosted_by_figshare) {
|
||||||
r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT()))
|
r.getInstance().asScala.foreach(i => i.setAccessright(ModelConstants.OPEN_ACCESS_RIGHT()))
|
||||||
val l: List[StructuredProperty] = List()
|
val l: List[Subject] = List()
|
||||||
r.setSubject(l.asJava)
|
r.setSubject(l.asJava)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -492,7 +492,7 @@ object DataciteToOAFTransformation {
|
||||||
subjects
|
subjects
|
||||||
.filter(s => s.subject.nonEmpty)
|
.filter(s => s.subject.nonEmpty)
|
||||||
.map(s =>
|
.map(s =>
|
||||||
OafMapperUtils.structuredProperty(
|
OafMapperUtils.subject(
|
||||||
s.subject.get,
|
s.subject.get,
|
||||||
SUBJ_CLASS,
|
SUBJ_CLASS,
|
||||||
SUBJ_CLASS,
|
SUBJ_CLASS,
|
||||||
|
|
|
@ -281,7 +281,7 @@ object BioDBToOAF {
|
||||||
d.setSubject(
|
d.setSubject(
|
||||||
subjects
|
subjects
|
||||||
.map(s =>
|
.map(s =>
|
||||||
OafMapperUtils.structuredProperty(
|
OafMapperUtils.subject(
|
||||||
s,
|
s,
|
||||||
SUBJ_CLASS,
|
SUBJ_CLASS,
|
||||||
SUBJ_CLASS,
|
SUBJ_CLASS,
|
||||||
|
|
|
@ -265,8 +265,8 @@ object PubMedToOaf {
|
||||||
result.setLanguage(term)
|
result.setLanguage(term)
|
||||||
}
|
}
|
||||||
|
|
||||||
val subjects: List[StructuredProperty] = article.getSubjects.asScala.map(s =>
|
val subjects: List[Subject] = article.getSubjects.asScala.map(s =>
|
||||||
OafMapperUtils.structuredProperty(
|
OafMapperUtils.subject(
|
||||||
s.getValue,
|
s.getValue,
|
||||||
SUBJ_CLASS,
|
SUBJ_CLASS,
|
||||||
SUBJ_CLASS,
|
SUBJ_CLASS,
|
||||||
|
|
|
@ -72,7 +72,7 @@ public class ProduceTest {
|
||||||
|
|
||||||
JavaRDD<Result> tmp = getResultJavaRDD();
|
JavaRDD<Result> tmp = getResultJavaRDD();
|
||||||
|
|
||||||
List<StructuredProperty> sbjs = tmp
|
List<Subject> sbjs = tmp
|
||||||
.filter(row -> row.getSubject() != null && row.getSubject().size() > 0)
|
.filter(row -> row.getSubject() != null && row.getSubject().size() > 0)
|
||||||
.flatMap(row -> row.getSubject().iterator())
|
.flatMap(row -> row.getSubject().iterator())
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -169,7 +169,7 @@ public class ProduceTest {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.size());
|
.size());
|
||||||
|
|
||||||
List<StructuredProperty> sbjs = tmp
|
List<Subject> sbjs = tmp
|
||||||
.filter(row -> row.getId().equals(doi))
|
.filter(row -> row.getId().equals(doi))
|
||||||
.flatMap(row -> row.getSubject().iterator())
|
.flatMap(row -> row.getSubject().iterator())
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -396,7 +396,7 @@ public class ProduceTest {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.size());
|
.size());
|
||||||
|
|
||||||
List<StructuredProperty> sbjs = tmp
|
List<Subject> sbjs = tmp
|
||||||
.filter(row -> row.getId().equals(doi))
|
.filter(row -> row.getId().equals(doi))
|
||||||
.flatMap(row -> row.getSubject().iterator())
|
.flatMap(row -> row.getSubject().iterator())
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -508,7 +508,7 @@ public class ProduceTest {
|
||||||
.getSubject()
|
.getSubject()
|
||||||
.size());
|
.size());
|
||||||
|
|
||||||
List<StructuredProperty> sbjs = tmp
|
List<Subject> sbjs = tmp
|
||||||
.filter(row -> row.getId().equals(doi))
|
.filter(row -> row.getId().equals(doi))
|
||||||
.flatMap(row -> row.getSubject().iterator())
|
.flatMap(row -> row.getSubject().iterator())
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -537,7 +537,7 @@ public class ProduceTest {
|
||||||
|
|
||||||
JavaRDD<Result> tmp = getResultJavaRDDPlusSDG();
|
JavaRDD<Result> tmp = getResultJavaRDDPlusSDG();
|
||||||
|
|
||||||
List<StructuredProperty> sbjs_sdg = tmp
|
List<Subject> sbjs_sdg = tmp
|
||||||
.filter(row -> row.getSubject() != null && row.getSubject().size() > 0)
|
.filter(row -> row.getSubject() != null && row.getSubject().size() > 0)
|
||||||
.flatMap(row -> row.getSubject().iterator())
|
.flatMap(row -> row.getSubject().iterator())
|
||||||
.filter(sbj -> sbj.getQualifier().getClassid().equals(Constants.SDG_CLASS_ID))
|
.filter(sbj -> sbj.getQualifier().getClassid().equals(Constants.SDG_CLASS_ID))
|
||||||
|
|
|
@ -26,20 +26,7 @@ import eu.dnetlib.broker.objects.OaBrokerRelatedPublication;
|
||||||
import eu.dnetlib.broker.objects.OaBrokerRelatedSoftware;
|
import eu.dnetlib.broker.objects.OaBrokerRelatedSoftware;
|
||||||
import eu.dnetlib.broker.objects.OaBrokerTypedValue;
|
import eu.dnetlib.broker.objects.OaBrokerTypedValue;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.ExternalReference;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
|
|
||||||
public class ConversionUtils {
|
public class ConversionUtils {
|
||||||
|
|
||||||
|
@ -71,6 +58,10 @@ public class ConversionUtils {
|
||||||
return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null;
|
return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static OaBrokerTypedValue oafSubjectToBrokerTypedValue(final Subject sp) {
|
||||||
|
return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null;
|
||||||
|
}
|
||||||
|
|
||||||
public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) {
|
public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) {
|
||||||
if (d == null) {
|
if (d == null) {
|
||||||
return null;
|
return null;
|
||||||
|
@ -115,7 +106,7 @@ public class ConversionUtils {
|
||||||
res.setTitles(structPropList(result.getTitle()));
|
res.setTitles(structPropList(result.getTitle()));
|
||||||
res.setAbstracts(fieldList(result.getDescription()));
|
res.setAbstracts(fieldList(result.getDescription()));
|
||||||
res.setLanguage(classId(result.getLanguage()));
|
res.setLanguage(classId(result.getLanguage()));
|
||||||
res.setSubjects(structPropTypedList(result.getSubject()));
|
res.setSubjects(subjectList(result.getSubject()));
|
||||||
res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor));
|
res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor));
|
||||||
res.setPublicationdate(fieldValue(result.getDateofacceptance()));
|
res.setPublicationdate(fieldValue(result.getDateofacceptance()));
|
||||||
res.setPublisher(fieldValue(result.getPublisher()));
|
res.setPublisher(fieldValue(result.getPublisher()));
|
||||||
|
@ -304,6 +295,18 @@ public class ConversionUtils {
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static List<OaBrokerTypedValue> subjectList(final List<Subject> list) {
|
||||||
|
if (list == null) {
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
return list
|
||||||
|
.stream()
|
||||||
|
.map(ConversionUtils::oafSubjectToBrokerTypedValue)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
private static <F, T> List<T> mappedList(final List<F> list, final Function<F, T> func) {
|
private static <F, T> List<T> mappedList(final List<F> list, final Function<F, T> func) {
|
||||||
if (list == null) {
|
if (list == null) {
|
||||||
return new ArrayList<>();
|
return new ArrayList<>();
|
||||||
|
|
|
@ -391,6 +391,28 @@ object DoiBoostMappingUtil {
|
||||||
di
|
di
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def createSubject(value: String, classId: String, schemeId: String): Subject = {
|
||||||
|
val s = new Subject
|
||||||
|
s.setQualifier(OafMapperUtils.qualifier(classId, classId, schemeId, schemeId))
|
||||||
|
s.setValue(value)
|
||||||
|
s
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def createSubject(
|
||||||
|
value: String,
|
||||||
|
classId: String,
|
||||||
|
className: String,
|
||||||
|
schemeId: String,
|
||||||
|
schemeName: String
|
||||||
|
): Subject = {
|
||||||
|
val s = new Subject
|
||||||
|
s.setQualifier(OafMapperUtils.qualifier(classId, className, schemeId, schemeName))
|
||||||
|
s.setValue(value)
|
||||||
|
s
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
def createSP(
|
def createSP(
|
||||||
value: String,
|
value: String,
|
||||||
classId: String,
|
classId: String,
|
||||||
|
|
|
@ -201,7 +201,7 @@ case object Crossref2Oaf {
|
||||||
|
|
||||||
if (subjectList.nonEmpty) {
|
if (subjectList.nonEmpty) {
|
||||||
result.setSubject(
|
result.setSubject(
|
||||||
subjectList.map(s => createSP(s, "keywords", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava
|
subjectList.map(s => createSubject(s, "keyword", ModelConstants.DNET_SUBJECT_TYPOLOGIES)).asJava
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ package eu.dnetlib.doiboost.mag
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants
|
import eu.dnetlib.dhp.schema.common.ModelConstants
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory
|
||||||
import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty}
|
import eu.dnetlib.dhp.schema.oaf.{Instance, Journal, Publication, StructuredProperty, Subject}
|
||||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil
|
||||||
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
import eu.dnetlib.doiboost.DoiBoostMappingUtil._
|
||||||
import org.json4s
|
import org.json4s
|
||||||
|
@ -210,8 +210,8 @@ case object ConversionUtil {
|
||||||
val className = "Microsoft Academic Graph classification"
|
val className = "Microsoft Academic Graph classification"
|
||||||
val classid = "MAG"
|
val classid = "MAG"
|
||||||
|
|
||||||
val p: List[StructuredProperty] = fieldOfStudy.subjects.flatMap(s => {
|
val p: List[Subject] = fieldOfStudy.subjects.flatMap(s => {
|
||||||
val s1 = createSP(
|
val s1 = createSubject(
|
||||||
s.DisplayName,
|
s.DisplayName,
|
||||||
classid,
|
classid,
|
||||||
className,
|
className,
|
||||||
|
@ -219,10 +219,10 @@ case object ConversionUtil {
|
||||||
ModelConstants.DNET_SUBJECT_TYPOLOGIES
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES
|
||||||
)
|
)
|
||||||
val di = DoiBoostMappingUtil.generateDataInfo(s.Score.toString)
|
val di = DoiBoostMappingUtil.generateDataInfo(s.Score.toString)
|
||||||
var resList: List[StructuredProperty] = List(s1)
|
var resList: List[Subject] = List(s1)
|
||||||
if (s.MainType.isDefined) {
|
if (s.MainType.isDefined) {
|
||||||
val maintp = s.MainType.get
|
val maintp = s.MainType.get
|
||||||
val s2 = createSP(
|
val s2 = createSubject(
|
||||||
s.MainType.get,
|
s.MainType.get,
|
||||||
classid,
|
classid,
|
||||||
className,
|
className,
|
||||||
|
@ -232,7 +232,7 @@ case object ConversionUtil {
|
||||||
s2.setDataInfo(di)
|
s2.setDataInfo(di)
|
||||||
resList = resList ::: List(s2)
|
resList = resList ::: List(s2)
|
||||||
if (maintp.contains(".")) {
|
if (maintp.contains(".")) {
|
||||||
val s3 = createSP(
|
val s3 = createSubject(
|
||||||
maintp.split("\\.").head,
|
maintp.split("\\.").head,
|
||||||
classid,
|
classid,
|
||||||
className,
|
className,
|
||||||
|
|
|
@ -290,7 +290,7 @@ public class EOSCTagJobTest {
|
||||||
.stream()
|
.stream()
|
||||||
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
.anyMatch(s -> s.getCode().equals("EOSC::Jupyter Notebook")));
|
||||||
|
|
||||||
List<StructuredProperty> subjects = tmp
|
List<Subject> subjects = tmp
|
||||||
.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
|
.filter(sw -> sw.getId().equals("50|od______1582::6e7a9b21a2feef45673890432af34244"))
|
||||||
.collect()
|
.collect()
|
||||||
.get(0)
|
.get(0)
|
||||||
|
|
|
@ -3,16 +3,15 @@ package eu.dnetlib.dhp.oa.graph.clean;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.SerializationUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableConsumer;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.oaf.AccessRight;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Country;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
|
|
||||||
public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Object>> implements Serializable {
|
public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Object>> implements Serializable {
|
||||||
|
|
||||||
|
@ -27,9 +26,36 @@ public class CleaningRuleMap extends HashMap<Class<?>, SerializableConsumer<Obje
|
||||||
mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o));
|
mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o));
|
||||||
mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o));
|
mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o));
|
||||||
mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o));
|
mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o));
|
||||||
|
mapping.put(Subject.class, o -> cleanSubject(vocabularies, (Subject) o));
|
||||||
return mapping;
|
return mapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void cleanSubject(VocabularyGroup vocabularies, Subject subject) {
|
||||||
|
if (cleanSubjectForVocabulary(ModelConstants.DNET_SUBJECT_FOS_CLASSID, vocabularies, subject)) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
// TODO cleaning based on different subject vocabs can be added here
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean cleanSubjectForVocabulary(String vocabularyId, VocabularyGroup vocabularies,
|
||||||
|
Subject subject) {
|
||||||
|
AtomicReference<Boolean> modified = new AtomicReference<>(false);
|
||||||
|
vocabularies.find(vocabularyId).ifPresent(vocabulary -> {
|
||||||
|
if (!ModelConstants.DNET_SUBJECT_KEYWORD.equalsIgnoreCase(subject.getQualifier().getClassid())) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Qualifier newValue = vocabulary.lookup(subject.getValue());
|
||||||
|
if (!ModelConstants.UNKNOWN.equals(newValue.getClassid())) {
|
||||||
|
subject.setValue(newValue.getClassid());
|
||||||
|
subject.getQualifier().setClassid(vocabularyId);
|
||||||
|
subject.getQualifier().setClassname(vocabulary.getName());
|
||||||
|
modified.set(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return modified.get();
|
||||||
|
}
|
||||||
|
|
||||||
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {
|
private static void cleanRelation(VocabularyGroup vocabularies, Relation r) {
|
||||||
if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) {
|
if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) {
|
||||||
Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType());
|
Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType());
|
||||||
|
|
|
@ -8,15 +8,7 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.keyValue;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.oaiIProvenance;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
@ -29,26 +21,7 @@ import com.google.common.collect.Sets;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.AccessRight;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Context;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Field;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.GeoLocation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Journal;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OAIProvenance;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
|
@ -411,7 +384,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected abstract List<StructuredProperty> prepareTitles(Document doc, DataInfo info);
|
protected abstract List<StructuredProperty> prepareTitles(Document doc, DataInfo info);
|
||||||
|
|
||||||
protected abstract List<StructuredProperty> prepareSubjects(Document doc, DataInfo info);
|
protected abstract List<Subject> prepareSubjects(Document doc, DataInfo info);
|
||||||
|
|
||||||
protected abstract Qualifier prepareLanguages(Document doc);
|
protected abstract Qualifier prepareLanguages(Document doc);
|
||||||
|
|
||||||
|
@ -559,6 +532,22 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected List<Subject> prepareSubjectList(
|
||||||
|
final Node node,
|
||||||
|
final String xpath,
|
||||||
|
final DataInfo info) {
|
||||||
|
final List<Subject> res = new ArrayList<>();
|
||||||
|
for (final Object o : node.selectNodes(xpath)) {
|
||||||
|
final Node n = (Node) o;
|
||||||
|
res
|
||||||
|
.add(
|
||||||
|
subject(
|
||||||
|
n.getText(), n.valueOf("@classid"), n.valueOf("@classname"), n.valueOf("@schemeid"),
|
||||||
|
n.valueOf("@schemename"), info));
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
protected OAIProvenance prepareOAIprovenance(final Document doc) {
|
||||||
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
final Node n = doc.selectSingleNode("//*[local-name()='provenance']/*[local-name()='originDescription']");
|
||||||
|
|
||||||
|
|
|
@ -84,8 +84,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) {
|
protected List<Subject> prepareSubjects(final Document doc, final DataInfo info) {
|
||||||
return prepareListStructProps(doc, "//dc:subject", info);
|
return prepareSubjectList(doc, "//dc:subject", info);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -257,8 +257,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<StructuredProperty> prepareSubjects(final Document doc, final DataInfo info) {
|
protected List<Subject> prepareSubjects(final Document doc, final DataInfo info) {
|
||||||
return prepareListStructProps(doc, "//*[local-name()='subject']", info);
|
return prepareSubjectList(doc, "//*[local-name()='subject']", info);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -251,6 +251,33 @@ public class GraphCleaningFunctionsTest {
|
||||||
pid.getQualifier().getClassname()));
|
pid.getQualifier().getClassname()));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
assertNotNull(p_cleaned.getSubject());
|
||||||
|
|
||||||
|
List<Subject> fos_subjects = p_cleaned
|
||||||
|
.getSubject()
|
||||||
|
.stream()
|
||||||
|
.filter(s -> ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
assertNotNull(fos_subjects);
|
||||||
|
assertEquals(2, fos_subjects.size());
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
fos_subjects
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
s -> "0101 mathematics".equals(s.getValue()) &
|
||||||
|
ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid()) &
|
||||||
|
"sysimport:crosswalk:datasetarchive"
|
||||||
|
.equals(s.getDataInfo().getProvenanceaction().getClassid())));
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
fos_subjects
|
||||||
|
.stream()
|
||||||
|
.anyMatch(
|
||||||
|
s -> "0102 computer and information sciences".equals(s.getValue()) &
|
||||||
|
ModelConstants.DNET_SUBJECT_FOS_CLASSID.equals(s.getQualifier().getClassid())));
|
||||||
|
|
||||||
// TODO add more assertions to verity the cleaned values
|
// TODO add more assertions to verity the cleaned values
|
||||||
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
System.out.println(MAPPER.writeValueAsString(p_cleaned));
|
||||||
}
|
}
|
||||||
|
|
|
@ -928,7 +928,8 @@ class MappersTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testROHub2() throws IOException, DocumentException {
|
void testROHub2() throws IOException, DocumentException {
|
||||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("rohub-modified.xml")));
|
final String xml = IOUtils
|
||||||
|
.toString(Objects.requireNonNull(getClass().getResourceAsStream("rohub-modified.xml")));
|
||||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||||
System.out.println("***************");
|
System.out.println("***************");
|
||||||
System.out.println(new ObjectMapper().writeValueAsString(list));
|
System.out.println(new ObjectMapper().writeValueAsString(list));
|
||||||
|
|
|
@ -743,12 +743,12 @@
|
||||||
"trust": "0.9"
|
"trust": "0.9"
|
||||||
},
|
},
|
||||||
"qualifier": {
|
"qualifier": {
|
||||||
"classid": "",
|
"classid": "keyword",
|
||||||
"classname": "",
|
"classname": "keyword",
|
||||||
"schemeid": "",
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
"schemename": ""
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
},
|
},
|
||||||
"value": "infrared detectors"
|
"value": "FOS: Mathematics"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"dataInfo": {
|
"dataInfo": {
|
||||||
|
@ -765,12 +765,12 @@
|
||||||
"trust": "0.9"
|
"trust": "0.9"
|
||||||
},
|
},
|
||||||
"qualifier": {
|
"qualifier": {
|
||||||
"classid": "",
|
"classid": "keyword",
|
||||||
"classname": "",
|
"classname": "keyword",
|
||||||
"schemeid": "",
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
"schemename": ""
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
},
|
},
|
||||||
"value": "lens antennas"
|
"value": "FOS: Computer and information sciences"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"dataInfo": {
|
"dataInfo": {
|
||||||
|
@ -787,12 +787,34 @@
|
||||||
"trust": "0.9"
|
"trust": "0.9"
|
||||||
},
|
},
|
||||||
"qualifier": {
|
"qualifier": {
|
||||||
"classid": "",
|
"classid": "keyword",
|
||||||
"classname": "",
|
"classname": "keyword",
|
||||||
"schemeid": "",
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
"schemename": ""
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
},
|
},
|
||||||
"value": "silicon"
|
"value": "0101 mathematics"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"dataInfo": {
|
||||||
|
"deletedbyinference": false,
|
||||||
|
"inferenceprovenance": "",
|
||||||
|
"inferred": false,
|
||||||
|
"invisible": false,
|
||||||
|
"provenanceaction": {
|
||||||
|
"classid": "subject:fos",
|
||||||
|
"classname": "subject:fos",
|
||||||
|
"schemeid": "dnet:provenanceActions",
|
||||||
|
"schemename": "dnet:provenanceActions"
|
||||||
|
},
|
||||||
|
"trust": "0.9"
|
||||||
|
},
|
||||||
|
"qualifier": {
|
||||||
|
"classid": "keyword",
|
||||||
|
"classname": "keyword",
|
||||||
|
"schemeid": "dnet:subject_classification_typologies",
|
||||||
|
"schemename": "dnet:subject_classification_typologies"
|
||||||
|
},
|
||||||
|
"value": "0101 mathematics"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"dataInfo": {
|
"dataInfo": {
|
||||||
|
|
|
@ -1244,3 +1244,5 @@ dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy
|
||||||
dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo
|
dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo
|
||||||
dnet:relation_subRelType @=@ relationship @=@ publicationDataset
|
dnet:relation_subRelType @=@ relationship @=@ publicationDataset
|
||||||
dnet:provenanceActions @=@ iis @=@ erroneous label to be cleaned
|
dnet:provenanceActions @=@ iis @=@ erroneous label to be cleaned
|
||||||
|
FOS @=@ 0101 mathematics @=@ FOS: Mathematics
|
||||||
|
FOS @=@ 0102 computer and information sciences @=@ FOS: Computer and information sciences
|
|
@ -1119,3 +1119,6 @@ dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ review @=@ review
|
||||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ similarity @=@ similarity
|
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ similarity @=@ similarity
|
||||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ supplement @=@ supplement
|
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ supplement @=@ supplement
|
||||||
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ version @=@ version
|
dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ version @=@ version
|
||||||
|
FOS @=@ Fields of Science and Technology classification @=@ 0101 mathematics @=@ 0101 mathematics
|
||||||
|
FOS @=@ Fields of Science and Technology classification @=@ 0102 computer and information sciences @=@ 0102 computer and information sciences
|
||||||
|
FOS @=@ Fields of Science and Technology classification @=@ 0103 physical sciences @=@ 0103 physical sciences
|
|
@ -59,7 +59,7 @@ class ResolveEntitiesTest extends Serializable {
|
||||||
r.setId(id.toLowerCase.trim)
|
r.setId(id.toLowerCase.trim)
|
||||||
r.setSubject(
|
r.setSubject(
|
||||||
List(
|
List(
|
||||||
OafMapperUtils.structuredProperty(
|
OafMapperUtils.subject(
|
||||||
FAKE_SUBJECT,
|
FAKE_SUBJECT,
|
||||||
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"),
|
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"),
|
||||||
null
|
null
|
||||||
|
@ -250,7 +250,7 @@ class ResolveEntitiesTest extends Serializable {
|
||||||
val r = new Result
|
val r = new Result
|
||||||
r.setSubject(
|
r.setSubject(
|
||||||
List(
|
List(
|
||||||
OafMapperUtils.structuredProperty(
|
OafMapperUtils.subject(
|
||||||
FAKE_SUBJECT,
|
FAKE_SUBJECT,
|
||||||
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"),
|
OafMapperUtils.qualifier("fos", "fosCS", "fossSchema", "fossiFIgo"),
|
||||||
null
|
null
|
||||||
|
|
|
@ -95,7 +95,6 @@ public class IndexRecordTransformerTest {
|
||||||
testRecordTransformation(record);
|
testRecordTransformation(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testForEOSCFutureDataTransferPilot() throws IOException, TransformerException {
|
public void testForEOSCFutureDataTransferPilot() throws IOException, TransformerException {
|
||||||
final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/data-transfer-pilot.xml"));
|
final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/data-transfer-pilot.xml"));
|
||||||
|
@ -129,8 +128,6 @@ public class IndexRecordTransformerTest {
|
||||||
testRecordTransformation(record);
|
testRecordTransformation(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testDoiUrlNormalization() throws MalformedURLException {
|
void testDoiUrlNormalization() throws MalformedURLException {
|
||||||
|
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -801,7 +801,7 @@
|
||||||
<mockito-core.version>3.3.3</mockito-core.version>
|
<mockito-core.version>3.3.3</mockito-core.version>
|
||||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||||
<vtd.version>[2.12,3.0)</vtd.version>
|
<vtd.version>[2.12,3.0)</vtd.version>
|
||||||
<dhp-schemas.version>[2.12.2-SNAPSHOT]</dhp-schemas.version>
|
<dhp-schemas.version>[3.14.0]</dhp-schemas.version>
|
||||||
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
|
||||||
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
|
||||||
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
|
||||||
|
|
Loading…
Reference in New Issue