forked from D-Net/dnet-hadoop
[EOSC TAG] added code to remove EOSC Jupyter Notebook from subjects and put EOSC as classid in the qualifier
This commit is contained in:
parent
22f65680b9
commit
e4eac1d20b
|
@ -30,7 +30,7 @@ public class SparkEoscTag {
|
||||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
public static final Qualifier EOSC_QUALIFIER = OafMapperUtils
|
public static final Qualifier EOSC_QUALIFIER = OafMapperUtils
|
||||||
.qualifier(
|
.qualifier(
|
||||||
"eosc",
|
"EOSC",
|
||||||
"European Open Science Cloud",
|
"European Open Science Cloud",
|
||||||
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES);
|
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES);
|
||||||
public static final DataInfo EOSC_DATAINFO = OafMapperUtils
|
public static final DataInfo EOSC_DATAINFO = OafMapperUtils
|
||||||
|
@ -95,7 +95,14 @@ public class SparkEoscTag {
|
||||||
|
|
||||||
if (containsCriteriaNotebook(s)) {
|
if (containsCriteriaNotebook(s)) {
|
||||||
sbject.add(EOSC_NOTEBOOK);
|
sbject.add(EOSC_NOTEBOOK);
|
||||||
|
if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))){
|
||||||
|
sbject = sbject.stream().map(sb -> {
|
||||||
|
if (sb.getValue().equals("EOSC Jupyter Notebook")){
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return sb;
|
||||||
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (containsCriteriaGalaxy(s)) {
|
if (containsCriteriaGalaxy(s)) {
|
||||||
sbject.add(EOSC_GALAXY);
|
sbject.add(EOSC_GALAXY);
|
||||||
|
|
|
@ -103,21 +103,19 @@ object SparkConvertRDDtoDataset {
|
||||||
"IsAmongTopNSimilarDocuments"
|
"IsAmongTopNSimilarDocuments"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
val rddRelation = spark.sparkContext
|
val rddRelation = spark.sparkContext
|
||||||
.textFile(s"$sourcePath/relation")
|
.textFile(s"$sourcePath/relation")
|
||||||
.map(s => mapper.readValue(s, classOf[Relation]))
|
.map(s => mapper.readValue(s, classOf[Relation]))
|
||||||
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
|
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
|
||||||
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
|
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
|
||||||
//filter OpenCitations relations
|
//filter OpenCitations relations
|
||||||
.filter(r => r.getCollectedfrom!= null && r.getCollectedfrom.size()>0 && !r.getCollectedfrom.asScala.exists(k => "opencitations".equalsIgnoreCase(k.getValue)))
|
.filter(r =>
|
||||||
|
r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
|
||||||
|
"opencitations".equalsIgnoreCase(k.getValue)
|
||||||
|
)
|
||||||
|
)
|
||||||
.filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
|
.filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
|
||||||
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
|
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue