diff --git a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectLOT1Entities.java b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectLOT1Entities.java index b85271d..9e122f2 100644 --- a/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectLOT1Entities.java +++ b/dump/src/main/java/eu/dnetlib/dhp/oa/graph/dump/filterentities/SelectLOT1Entities.java @@ -98,8 +98,9 @@ public class SelectLOT1Entities implements Serializable { .schema(Encoders.bean(Software.class).schema()) .json(inputPath + "software") .where("datainfo.deletedbyinference != true AND datainfo.invisible != true") - .selectExpr("id", "instance", "explode(pid) as pid") - .where("pid.qualifier.classid IN ('doi', 'swhid')") // filter by pid type + .select("id", "instance", "pid") + .where("array_contains(instance.hostedby.key ,'10|re3data_____::c4b2081b224be6b3e79d0e5e5556f631') OR array_contains(instance.hostedby.key, '10|openaire____::dbfd07503aaa1ed31beed7dec942f3f4') " + + "OR array_contains(pid.qualifier.classid,'doi') OR array_contains(pid.qualifier.classid,'swhid')") // filter by pid type and hosting datasource .selectExpr("id", "explode(instance) as instance") .withColumn( "CCL", @@ -119,8 +120,9 @@ public class SelectLOT1Entities implements Serializable { .schema(Encoders.bean(Dataset.class).schema()) .json(inputPath + "dataset") .where("datainfo.deletedbyinference != true AND datainfo.invisible != true") - .selectExpr("id", "instance", "explode(pid) as pid") - .where("pid.qualifier.classid IN ('doi', 'handle', 'pdb', 'ena', 'uniprot')") // filter by pid type + .select("id", "instance", "pid") + .where("array_contains(pid.qualifier.classid ,'doi') OR array_contains(pid.qualifier.classid ,'handle') OR array_contains(pid.qualifier.classid , 'pdb') array_contains(pid.qualifier.classid , 'ena') array_contains(pid.qualifier.classid , 'uniprot') OR " + + "array_contains(instance.hostedby.key ,'10|re3data_____::c4b2081b224be6b3e79d0e5e5556f631') OR array_contains(instance.hostedby.key, '10|openaire____::dbfd07503aaa1ed31beed7dec942f3f4') )") // filter by pid type .selectExpr("id", "explode(instance) as instance") .withColumn( "CCL",