forked from D-Net/dnet-hadoop
[EOSC TAG] added code to remove EOSC Jupyter Notebook from subjects and put EOSC as classid in the qualifier
This commit is contained in:
parent
22f65680b9
commit
e4eac1d20b
|
@ -30,7 +30,7 @@ public class SparkEoscTag {
|
|||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
public static final Qualifier EOSC_QUALIFIER = OafMapperUtils
|
||||
.qualifier(
|
||||
"eosc",
|
||||
"EOSC",
|
||||
"European Open Science Cloud",
|
||||
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES);
|
||||
public static final DataInfo EOSC_DATAINFO = OafMapperUtils
|
||||
|
@ -95,7 +95,14 @@ public class SparkEoscTag {
|
|||
|
||||
if (containsCriteriaNotebook(s)) {
|
||||
sbject.add(EOSC_NOTEBOOK);
|
||||
|
||||
if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))){
|
||||
sbject = sbject.stream().map(sb -> {
|
||||
if (sb.getValue().equals("EOSC Jupyter Notebook")){
|
||||
return null;
|
||||
}
|
||||
return sb;
|
||||
}).filter(Objects::nonNull).collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
if (containsCriteriaGalaxy(s)) {
|
||||
sbject.add(EOSC_GALAXY);
|
||||
|
|
|
@ -103,21 +103,19 @@ object SparkConvertRDDtoDataset {
|
|||
"IsAmongTopNSimilarDocuments"
|
||||
)
|
||||
|
||||
|
||||
val rddRelation = spark.sparkContext
|
||||
.textFile(s"$sourcePath/relation")
|
||||
.map(s => mapper.readValue(s, classOf[Relation]))
|
||||
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
|
||||
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
|
||||
//filter OpenCitations relations
|
||||
.filter(r => r.getCollectedfrom!= null && r.getCollectedfrom.size()>0 && !r.getCollectedfrom.asScala.exists(k => "opencitations".equalsIgnoreCase(k.getValue)))
|
||||
.filter(r =>
|
||||
r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
|
||||
"opencitations".equalsIgnoreCase(k.getValue)
|
||||
)
|
||||
)
|
||||
.filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
|
||||
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue