[EOSC TAG] added code to remove EOSC Jupyter Notebook from subjects and put EOSC as classid in the qualifier

This commit is contained in:
Miriam Baglioni 2022-05-13 11:01:33 +02:00
parent 22f65680b9
commit e4eac1d20b
2 changed files with 14 additions and 9 deletions

View File

@ -30,7 +30,7 @@ public class SparkEoscTag {
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final Qualifier EOSC_QUALIFIER = OafMapperUtils
.qualifier(
"eosc",
"EOSC",
"European Open Science Cloud",
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES);
public static final DataInfo EOSC_DATAINFO = OafMapperUtils
@ -95,7 +95,14 @@ public class SparkEoscTag {
if (containsCriteriaNotebook(s)) {
sbject.add(EOSC_NOTEBOOK);
if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))){
sbject = sbject.stream().map(sb -> {
if (sb.getValue().equals("EOSC Jupyter Notebook")){
return null;
}
return sb;
}).filter(Objects::nonNull).collect(Collectors.toList());
}
}
if (containsCriteriaGalaxy(s)) {
sbject.add(EOSC_GALAXY);

View File

@ -103,21 +103,19 @@ object SparkConvertRDDtoDataset {
"IsAmongTopNSimilarDocuments"
)
val rddRelation = spark.sparkContext
.textFile(s"$sourcePath/relation")
.map(s => mapper.readValue(s, classOf[Relation]))
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
//filter OpenCitations relations
.filter(r => r.getCollectedfrom!= null && r.getCollectedfrom.size()>0 && !r.getCollectedfrom.asScala.exists(k => "opencitations".equalsIgnoreCase(k.getValue)))
.filter(r =>
r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
"opencitations".equalsIgnoreCase(k.getValue)
)
)
.filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
}
}