[SKG-IFv1.1] Relaxing constraint to include also EDP and SH results

This commit is contained in:
Miriam Baglioni 2024-09-22 18:03:09 +02:00
parent fe71931002
commit 09e2ac1fce
1 changed files with 6 additions and 4 deletions

View File

@ -98,8 +98,9 @@ public class SelectLOT1Entities implements Serializable {
.schema(Encoders.bean(Software.class).schema()) .schema(Encoders.bean(Software.class).schema())
.json(inputPath + "software") .json(inputPath + "software")
.where("datainfo.deletedbyinference != true AND datainfo.invisible != true") .where("datainfo.deletedbyinference != true AND datainfo.invisible != true")
.selectExpr("id", "instance", "explode(pid) as pid") .select("id", "instance", "pid")
.where("pid.qualifier.classid IN ('doi', 'swhid')") // filter by pid type .where("array_contains(instance.hostedby.key ,'10|re3data_____::c4b2081b224be6b3e79d0e5e5556f631') OR array_contains(instance.hostedby.key, '10|openaire____::dbfd07503aaa1ed31beed7dec942f3f4') " +
"OR array_contains(pid.qualifier.classid,'doi') OR array_contains(pid.qualifier.classid,'swhid')") // filter by pid type and hosting datasource
.selectExpr("id", "explode(instance) as instance") .selectExpr("id", "explode(instance) as instance")
.withColumn( .withColumn(
"CCL", "CCL",
@ -119,8 +120,9 @@ public class SelectLOT1Entities implements Serializable {
.schema(Encoders.bean(Dataset.class).schema()) .schema(Encoders.bean(Dataset.class).schema())
.json(inputPath + "dataset") .json(inputPath + "dataset")
.where("datainfo.deletedbyinference != true AND datainfo.invisible != true") .where("datainfo.deletedbyinference != true AND datainfo.invisible != true")
.selectExpr("id", "instance", "explode(pid) as pid") .select("id", "instance", "pid")
.where("pid.qualifier.classid IN ('doi', 'handle', 'pdb', 'ena', 'uniprot')") // filter by pid type .where("array_contains(pid.qualifier.classid ,'doi') OR array_contains(pid.qualifier.classid ,'handle') OR array_contains(pid.qualifier.classid , 'pdb') array_contains(pid.qualifier.classid , 'ena') array_contains(pid.qualifier.classid , 'uniprot') OR " +
"array_contains(instance.hostedby.key ,'10|re3data_____::c4b2081b224be6b3e79d0e5e5556f631') OR array_contains(instance.hostedby.key, '10|openaire____::dbfd07503aaa1ed31beed7dec942f3f4') )") // filter by pid type
.selectExpr("id", "explode(instance) as instance") .selectExpr("id", "explode(instance) as instance")
.withColumn( .withColumn(
"CCL", "CCL",