From 09e2ac1fce7991fa92101a935024cb570f1e4f06 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Sun, 22 Sep 2024 18:03:09 +0200 Subject: [PATCH] [SKG-IFv1.1] Relaxing constraint to include also EDP and SH results --- .../graph/dump/filterentities/SelectLOT1Entities.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectLOT1Entities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectLOT1Entities.java index b85271d..9e122f2 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectLOT1Entities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectLOT1Entities.java @@ -98,8 +98,9 @@ public class SelectLOT1Entities implements Serializable { .schema(Encoders.bean(Software.class).schema()) .json(inputPath + "software") .where("datainfo.deletedbyinference != true AND datainfo.invisible != true") - .selectExpr("id", "instance", "explode(pid) as pid") - .where("pid.qualifier.classid IN ('doi', 'swhid')") // filter by pid type + .select("id", "instance", "pid") + .where("array_contains(instance.hostedby.key ,'10|re3data_____::c4b2081b224be6b3e79d0e5e5556f631') OR array_contains(instance.hostedby.key, '10|openaire____::dbfd07503aaa1ed31beed7dec942f3f4') " + + "OR array_contains(pid.qualifier.classid,'doi') OR array_contains(pid.qualifier.classid,'swhid')") // filter by pid type and hosting datasource .selectExpr("id", "explode(instance) as instance") .withColumn( "CCL", @@ -119,8 +120,9 @@ public class SelectLOT1Entities implements Serializable { .schema(Encoders.bean(Dataset.class).schema()) .json(inputPath + "dataset") .where("datainfo.deletedbyinference != true AND datainfo.invisible != true") - .selectExpr("id", "instance", "explode(pid) as pid") - .where("pid.qualifier.classid IN ('doi', 'handle', 'pdb', 'ena', 'uniprot')") // filter by pid type + .select("id", "instance", "pid") + .where("array_contains(pid.qualifier.classid ,'doi') OR array_contains(pid.qualifier.classid ,'handle') OR array_contains(pid.qualifier.classid , 'pdb') array_contains(pid.qualifier.classid , 'ena') array_contains(pid.qualifier.classid , 'uniprot') OR " + + "array_contains(instance.hostedby.key ,'10|re3data_____::c4b2081b224be6b3e79d0e5e5556f631') OR array_contains(instance.hostedby.key, '10|openaire____::dbfd07503aaa1ed31beed7dec942f3f4') )") // filter by pid type .selectExpr("id", "explode(instance) as instance") .withColumn( "CCL",