Merge branch 'beta' of code-repo.d4science.org:D-Net/dnet-hadoop into beta

This commit is contained in:
Sandro La Bruzzo 2022-05-16 10:30:35 +02:00
commit c1971d52c4
3 changed files with 15 additions and 10 deletions

View File

@ -30,7 +30,7 @@ public class SparkEoscTag {
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static final Qualifier EOSC_QUALIFIER = OafMapperUtils public static final Qualifier EOSC_QUALIFIER = OafMapperUtils
.qualifier( .qualifier(
"eosc", "EOSC",
"European Open Science Cloud", "European Open Science Cloud",
ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES); ModelConstants.DNET_SUBJECT_TYPOLOGIES, ModelConstants.DNET_SUBJECT_TYPOLOGIES);
public static final DataInfo EOSC_DATAINFO = OafMapperUtils public static final DataInfo EOSC_DATAINFO = OafMapperUtils
@ -95,7 +95,14 @@ public class SparkEoscTag {
if (containsCriteriaNotebook(s)) { if (containsCriteriaNotebook(s)) {
sbject.add(EOSC_NOTEBOOK); sbject.add(EOSC_NOTEBOOK);
if (sbject.stream().anyMatch(sb -> sb.getValue().equals("EOSC Jupyter Notebook"))){
sbject = sbject.stream().map(sb -> {
if (sb.getValue().equals("EOSC Jupyter Notebook")){
return null;
}
return sb;
}).filter(Objects::nonNull).collect(Collectors.toList());
}
} }
if (containsCriteriaGalaxy(s)) { if (containsCriteriaGalaxy(s)) {
sbject.add(EOSC_GALAXY); sbject.add(EOSC_GALAXY);

View File

@ -10,4 +10,4 @@ SELECT
'OpenOrgs Database' AS collectedfromname, 'OpenOrgs Database' AS collectedfromname,
'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction
FROM relationships FROM relationships
WHERE reltype = 'Child' OR reltype = 'Parent' WHERE reltype = 'IsChildOf' OR reltype = 'IsParentOf'

View File

@ -103,21 +103,19 @@ object SparkConvertRDDtoDataset {
"IsAmongTopNSimilarDocuments" "IsAmongTopNSimilarDocuments"
) )
val rddRelation = spark.sparkContext val rddRelation = spark.sparkContext
.textFile(s"$sourcePath/relation") .textFile(s"$sourcePath/relation")
.map(s => mapper.readValue(s, classOf[Relation])) .map(s => mapper.readValue(s, classOf[Relation]))
.filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false) .filter(r => r.getDataInfo != null && r.getDataInfo.getDeletedbyinference == false)
.filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50")) .filter(r => r.getSource.startsWith("50") && r.getTarget.startsWith("50"))
//filter OpenCitations relations //filter OpenCitations relations
.filter(r => r.getCollectedfrom!= null && r.getCollectedfrom.size()>0 && !r.getCollectedfrom.asScala.exists(k => "opencitations".equalsIgnoreCase(k.getValue))) .filter(r =>
r.getCollectedfrom != null && r.getCollectedfrom.size() > 0 && !r.getCollectedfrom.asScala.exists(k =>
"opencitations".equalsIgnoreCase(k.getValue)
)
)
.filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass))) .filter(r => !relationSemanticFilter.exists(k => k.equalsIgnoreCase(r.getRelClass)))
spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath") spark.createDataset(rddRelation).as[Relation].write.mode(SaveMode.Overwrite).save(s"$relPath")
} }
} }