diff --git a/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/BulkTagJobTest.java b/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/BulkTagJobTest.java index 4a45d234c4..d045042535 100644 --- a/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/BulkTagJobTest.java +++ b/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/BulkTagJobTest.java @@ -1,13 +1,24 @@ package eu.dnetlib.dhp; +import static eu.dnetlib.dhp.community.TagginConstants.ZENODO_COMMUNITY_INDICATOR; + import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Software; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.mortbay.util.IO; @@ -31,7 +42,7 @@ public class BulkTagJobTest { taggingConf = IO.toString( BulkTagJobTest.class.getResourceAsStream( - "/eu/dnetlib/dhp/communityconfiguration/tagging_conf.json")); + "/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml")); } catch (IOException e) { e.printStackTrace(); } @@ -66,7 +77,7 @@ public class BulkTagJobTest { } @Test - public void test1() throws Exception { + public void noUpdatesTest() throws Exception { SparkBulkTagJob2.main( new String[] { "-isTest", @@ -74,7 +85,7 @@ public class BulkTagJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", - getClass().getResource("/eu/dnetlib/dhp/sample/dataset").getPath(), + getClass().getResource("/eu/dnetlib/dhp/sample/dataset/no_updates").getPath(), "-taggingConf", taggingConf, "-resultTableName", @@ -92,142 +103,700 @@ public class BulkTagJobTest { // "-preparedInfoPath", // getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo").getPath() }); - } -} - -/* - - -import eu.dnetlib.dhp.orcidtoresultfromsemrel.OrcidPropagationJobTest; -import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob2; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import org.apache.commons.io.FileUtils; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; - -import static org.apache.spark.sql.functions.desc; - - - - - - @Test - public void test1() throws Exception { - SparkResultToCommunityThroughSemRelJob4.main(new String[]{ - "-isTest", Boolean.TRUE.toString(), - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/sample").getPath(), - "-hive_metastore_uris", "", - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-preparedInfoPath", getClass().getResource("/eu/dnetlib/dhp/resulttocommunityfromsemrel/preparedInfo").getPath() - }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); - JavaRDD tmp = sc.textFile(workingDir.toString() + "/dataset") - .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + JavaRDD tmp = + sc.textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); Assertions.assertEquals(10, tmp.count()); - org.apache.spark.sql.Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); + org.apache.spark.sql.Dataset verificationDataset = + spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); verificationDataset.createOrReplaceTempView("dataset"); - String query = "select id, MyT.id community " + - "from dataset " + - "lateral view explode(context) c as MyT " + - "lateral view explode(MyT.datainfo) d as MyD " + - "where MyD.inferenceprovenance = 'propagation'"; - - org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); - Assertions.assertEquals(5, resultExplodedProvenance.count()); - - Assertions.assertEquals(0, resultExplodedProvenance.filter("id = '50|dedup_wf_001::2305908abeca9da37eaf3bddcaf81b7b'").count()); - - Assertions.assertEquals(1, resultExplodedProvenance.filter("id = '50|dedup_wf_001::0489ae524201eedaa775da282dce35e7'").count()); - Assertions.assertEquals("dh-ch",resultExplodedProvenance.select("community") - .where(resultExplodedProvenance.col("id").equalTo("50|dedup_wf_001::0489ae524201eedaa775da282dce35e7")) - .collectAsList().get(0).getString(0)); - - Assertions.assertEquals(3, resultExplodedProvenance.filter("id = '50|dedup_wf_001::0a60e33b4f0986ebd9819451f2d87a28'").count()); - List rowList = resultExplodedProvenance.select("community") - .where(resultExplodedProvenance.col("id") - .equalTo("50|dedup_wf_001::0a60e33b4f0986ebd9819451f2d87a28")) - .sort(desc("community")).collectAsList(); - Assertions.assertEquals("mes", rowList.get(0).getString(0)); - Assertions.assertEquals("fam", rowList.get(1).getString(0)); - Assertions.assertEquals("ee", rowList.get(2).getString(0)); - - - Assertions.assertEquals(1, resultExplodedProvenance.filter("id = '50|dedup_wf_001::0ae02edb5598a5545d10b107fcf48dcc'").count()); - Assertions.assertEquals("aginfra", resultExplodedProvenance.select("community") - .where(resultExplodedProvenance.col("id") - .equalTo("50|dedup_wf_001::0ae02edb5598a5545d10b107fcf48dcc")) - .collectAsList().get(0).getString(0)); - - - query = "select id, MyT.id community " + - "from dataset " + - "lateral view explode(context) c as MyT " + - "lateral view explode(MyT.datainfo) d as MyD "; - - org.apache.spark.sql.Dataset resultCommunityId = spark.sql(query); - - Assertions.assertEquals(10, resultCommunityId.count()); - - Assertions.assertEquals(2, resultCommunityId.filter("id = '50|dedup_wf_001::0489ae524201eedaa775da282dce35e7'").count()); - rowList = resultCommunityId.select("community") - .where(resultCommunityId.col("id").equalTo("50|dedup_wf_001::0489ae524201eedaa775da282dce35e7")) - .sort(desc("community")) - .collectAsList(); - Assertions.assertEquals("dh-ch", rowList.get(0).getString(0)); - Assertions.assertEquals("beopen", rowList.get(1).getString(0)); - - Assertions.assertEquals(3, resultCommunityId.filter("id = '50|dedup_wf_001::0a60e33b4f0986ebd9819451f2d87a28'").count()); - rowList = resultCommunityId.select("community") - .where(resultCommunityId.col("id").equalTo("50|dedup_wf_001::0a60e33b4f0986ebd9819451f2d87a28")) - .sort(desc("community")) - .collectAsList(); - Assertions.assertEquals("mes", rowList.get(0).getString(0)); - Assertions.assertEquals("fam", rowList.get(1).getString(0)); - Assertions.assertEquals("ee", rowList.get(2).getString(0)); - - Assertions.assertEquals(2, resultCommunityId.filter("id = '50|dedup_wf_001::0ae02edb5598a5545d10b107fcf48dcc'").count()); - rowList = resultCommunityId.select("community") - .where(resultCommunityId.col("id").equalTo("50|dedup_wf_001::0ae02edb5598a5545d10b107fcf48dcc")) - .sort(desc("community")) - .collectAsList(); - Assertions.assertEquals("beopen", rowList.get(0).getString(0)); - Assertions.assertEquals("aginfra", rowList.get(1).getString(0)); - - Assertions.assertEquals(2, resultCommunityId.filter("id = '50|dedup_wf_001::2305908abeca9da37eaf3bddcaf81b7b'").count()); - rowList = resultCommunityId.select("community") - .where(resultCommunityId.col("id").equalTo("50|dedup_wf_001::2305908abeca9da37eaf3bddcaf81b7b")) - .sort(desc("community")) - .collectAsList(); - Assertions.assertEquals("euromarine", rowList.get(1).getString(0)); - Assertions.assertEquals("ni", rowList.get(0).getString(0)); - - Assertions.assertEquals(1, resultCommunityId.filter("id = '50|doajarticles::8d817039a63710fcf97e30f14662c6c8'").count()); - Assertions.assertEquals("euromarine", resultCommunityId.select("community") - .where(resultCommunityId.col("id") - .equalTo("50|doajarticles::8d817039a63710fcf97e30f14662c6c8")) - .collectAsList().get(0).getString(0)); - + String query = + "select id, MyT.id community " + + "from dataset " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + Assertions.assertEquals(0, spark.sql(query).count()); } - */ + + @Test + public void bulktagBySubjectNoPreviousContextTest() throws Exception { + SparkBulkTagJob2.main( + new String[] { + "-isTest", + Boolean.TRUE.toString(), + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-sourcePath", + getClass() + .getResource("/eu/dnetlib/dhp/sample/dataset/update_subject/nocontext") + .getPath(), + "-taggingConf", + taggingConf, + "-resultTableName", + "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", + workingDir.toString() + "/dataset", + "-isLookupUrl", + "http://beta.services.openaire.eu:8280/is/services/isLookUp", + "-protoMap", + "{ \"author\" : \"$['author'][*]['fullname']\"," + + " \"title\" : \"$['title'][*]['value']\"," + + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + + " \"contributor\" : \"$['contributor'][*]['value']\"," + + " \"description\" : \"$['description'][*]['value']\"}" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = + sc.textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + + Assertions.assertEquals(10, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = + spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); + + verificationDataset.createOrReplaceTempView("dataset"); + + String query = + "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " + + "from dataset " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + Assertions.assertEquals(5, spark.sql(query).count()); + + org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + Assertions.assertEquals( + 5, idExplodeCommunity.filter("provenance = 'community:subject'").count()); + Assertions.assertEquals( + 5, + idExplodeCommunity.filter("name = 'Bulktagging for Community - Subject'").count()); + + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'covid-19'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'mes'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'fam'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'aginfra'").count()); + + Assertions.assertEquals( + 1, + idExplodeCommunity + .filter("id = '50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529'") + .count()); + Assertions.assertEquals( + 1, + idExplodeCommunity + .filter( + "community = 'covid-19' and id = '50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529'") + .count()); + + Assertions.assertEquals( + 2, + idExplodeCommunity + .filter("id = '50|od______3989::05d8c751462f9bb8d2b06956dfbc5c7b'") + .count()); + Assertions.assertEquals( + 2, + idExplodeCommunity + .filter( + "(community = 'covid-19' or community = 'aginfra') and id = '50|od______3989::05d8c751462f9bb8d2b06956dfbc5c7b'") + .count()); + + Assertions.assertEquals( + 2, + idExplodeCommunity + .filter("id = '50|od______3989::0f89464c4ac4c398fe0c71433b175a62'") + .count()); + Assertions.assertEquals( + 2, + idExplodeCommunity + .filter( + "(community = 'mes' or community = 'fam') and id = '50|od______3989::0f89464c4ac4c398fe0c71433b175a62'") + .count()); + } + + @Test + public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { + SparkBulkTagJob2.main( + new String[] { + "-isTest", + Boolean.TRUE.toString(), + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-sourcePath", + getClass() + .getResource( + "/eu/dnetlib/dhp/sample/dataset/update_subject/contextnoprovenance") + .getPath(), + "-taggingConf", + taggingConf, + "-resultTableName", + "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", + workingDir.toString() + "/dataset", + "-isLookupUrl", + "http://beta.services.openaire.eu:8280/is/services/isLookUp", + "-protoMap", + "{ \"author\" : \"$['author'][*]['fullname']\"," + + " \"title\" : \"$['title'][*]['value']\"," + + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + + " \"contributor\" : \"$['contributor'][*]['value']\"," + + " \"description\" : \"$['description'][*]['value']\"}" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = + sc.textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + + Assertions.assertEquals(10, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = + spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); + + verificationDataset.createOrReplaceTempView("dataset"); + + String query = + "select id, MyT.id community, MyD.provenanceaction.classid provenance " + + "from dataset " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyT.id = 'covid-19' "; + + Assertions.assertEquals(3, spark.sql(query).count()); + + org.apache.spark.sql.Dataset communityContext = spark.sql(query); + + Assertions.assertEquals( + 2, + communityContext + .filter("id = '50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529'") + .count()); + Assertions.assertEquals( + 1, + communityContext + .filter( + "id = '50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529' and provenance = 'community:subject'") + .count()); + Assertions.assertEquals( + 1, + communityContext + .filter( + "id = '50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529' and provenance = 'propagation:community:productsthroughsemrel'") + .count()); + + query = + "select id, MyT.id community, size(MyT.datainfo) datainfosize " + + "from dataset " + + "lateral view explode (context) as MyT " + + "where size(MyT.datainfo) > 0"; + + Assertions.assertEquals( + 2, + spark.sql(query) + .select("datainfosize") + .where( + "id = '50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529' a" + + "nd community = 'covid-19'") + .collectAsList() + .get(0) + .getInt(0)); + } + + @Test + public void bulktagByDatasourceTest() throws Exception { + SparkBulkTagJob2.main( + new String[] { + "-isTest", + Boolean.TRUE.toString(), + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-sourcePath", + getClass() + .getResource("/eu/dnetlib/dhp/sample/publication/update_datasource") + .getPath(), + "-taggingConf", + taggingConf, + "-resultTableName", + "eu.dnetlib.dhp.schema.oaf.Publication", + "-outputPath", + workingDir.toString() + "/publication", + "-isLookupUrl", + "http://beta.services.openaire.eu:8280/is/services/isLookUp", + "-protoMap", + "{ \"author\" : \"$['author'][*]['fullname']\"," + + " \"title\" : \"$['title'][*]['value']\"," + + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + + " \"contributor\" : \"$['contributor'][*]['value']\"," + + " \"description\" : \"$['description'][*]['value']\"}" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = + sc.textFile(workingDir.toString() + "/publication") + .map(item -> OBJECT_MAPPER.readValue(item, Publication.class)); + + Assertions.assertEquals(10, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = + spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); + + verificationDataset.createOrReplaceTempView("publication"); + + String query = + "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " + + "from publication " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + + Assertions.assertEquals(5, idExplodeCommunity.count()); + Assertions.assertEquals( + 5, idExplodeCommunity.filter("provenance = 'community:datasource'").count()); + Assertions.assertEquals( + 5, + idExplodeCommunity + .filter("name = 'Bulktagging for Community - Datasource'") + .count()); + + Assertions.assertEquals(3, idExplodeCommunity.filter("community = 'fam'").count()); + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'aginfra'").count()); + + Assertions.assertEquals( + 3, + idExplodeCommunity + .filter( + "community = 'fam' and (id = '50|ec_fp7health::000085c89f4b96dc2269bd37edb35306' " + + "or id = '50|ec_fp7health::000b9e61f83f5a4b0c35777b7bccdf38' " + + "or id = '50|ec_fp7health::0010eb63e181e3e91b8b6dc6b3e1c798')") + .count()); + + Assertions.assertEquals( + 2, + idExplodeCommunity + .filter( + "community = 'aginfra' and (id = '50|ec_fp7health::000c8195edd542e4e64ebb32172cbf89' " + + "or id = '50|ec_fp7health::0010eb63e181e3e91b8b6dc6b3e1c798')") + .count()); + } + + @Test + public void bulktagByZenodoCommunityTest() throws Exception { + SparkBulkTagJob2.main( + new String[] { + "-isTest", + Boolean.TRUE.toString(), + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-sourcePath", + getClass() + .getResource( + "/eu/dnetlib/dhp/sample/otherresearchproduct/update_zenodocommunity") + .getPath(), + "-taggingConf", + taggingConf, + "-resultTableName", + "eu.dnetlib.dhp.schema.oaf.OtherResearchProduct", + "-outputPath", + workingDir.toString() + "/orp", + "-isLookupUrl", + "http://beta.services.openaire.eu:8280/is/services/isLookUp", + "-protoMap", + "{ \"author\" : \"$['author'][*]['fullname']\"," + + " \"title\" : \"$['title'][*]['value']\"," + + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + + " \"contributor\" : \"$['contributor'][*]['value']\"," + + " \"description\" : \"$['description'][*]['value']\"}" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = + sc.textFile(workingDir.toString() + "/orp") + .map(item -> OBJECT_MAPPER.readValue(item, OtherResearchProduct.class)); + + Assertions.assertEquals(10, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = + spark.createDataset(tmp.rdd(), Encoders.bean(OtherResearchProduct.class)); + + verificationDataset.createOrReplaceTempView("orp"); + + String query = + "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " + + "from orp " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + Assertions.assertEquals(8, idExplodeCommunity.count()); + + Assertions.assertEquals( + 8, idExplodeCommunity.filter("provenance = 'community:zenodocommunity'").count()); + Assertions.assertEquals( + 8, + idExplodeCommunity.filter("name = 'Bulktagging for Community - Zenodo'").count()); + + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'covid-19'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'aginfra'").count()); + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'beopen'").count()); + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'fam'").count()); + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'mes'").count()); + + Assertions.assertEquals( + 1, + idExplodeCommunity + .filter( + "id = '50|od______2017::0750a4d0782265873d669520f5e33c07' " + + "and community = 'covid-19'") + .count()); + Assertions.assertEquals( + 3, + idExplodeCommunity + .filter( + "id = '50|od______2017::1bd97baef19dbd2db3203b112bb83bc5' and " + + "(community = 'aginfra' or community = 'mes' or community = 'fam')") + .count()); + Assertions.assertEquals( + 1, + idExplodeCommunity + .filter( + "id = '50|od______2017::1e400f1747487fd15998735c41a55c72' " + + "and community = 'beopen'") + .count()); + Assertions.assertEquals( + 3, + idExplodeCommunity + .filter( + "id = '50|od______2017::210281c5bc1c739a11ccceeeca806396' and " + + "(community = 'beopen' or community = 'fam' or community = 'mes')") + .count()); + + query = + "select id, MyT.id community, size(MyT.datainfo) datainfosize " + + "from orp " + + "lateral view explode (context) as MyT " + + "where size(MyT.datainfo) > 0"; + + Assertions.assertEquals( + 2, + spark.sql(query) + .select("datainfosize") + .where( + "id = '50|od______2017::210281c5bc1c739a11ccceeeca806396' a" + + "nd community = 'beopen'") + .collectAsList() + .get(0) + .getInt(0)); + + // verify the zenodo community context is not present anymore in the records + query = + "select id, MyT.id community " + + "from orp " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD "; + + org.apache.spark.sql.Dataset tmp2 = spark.sql(query); + + Assertions.assertEquals( + 0, + tmp2.select("community") + .where(tmp2.col("community").contains(ZENODO_COMMUNITY_INDICATOR)) + .count()); + } + + @Test + public void bulktagBySubjectDatasourceTest() throws Exception { + SparkBulkTagJob2.main( + new String[] { + "-isTest", + Boolean.TRUE.toString(), + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-sourcePath", + getClass() + .getResource("/eu/dnetlib/dhp/sample/dataset/update_subject_datasource") + .getPath(), + "-taggingConf", + taggingConf, + "-resultTableName", + "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", + workingDir.toString() + "/dataset", + "-isLookupUrl", + "http://beta.services.openaire.eu:8280/is/services/isLookUp", + "-protoMap", + "{ \"author\" : \"$['author'][*]['fullname']\"," + + " \"title\" : \"$['title'][*]['value']\"," + + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + + " \"contributor\" : \"$['contributor'][*]['value']\"," + + " \"description\" : \"$['description'][*]['value']\"}" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = + sc.textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + + Assertions.assertEquals(10, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = + spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); + + verificationDataset.createOrReplaceTempView("dataset"); + + String query = + "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " + + "from dataset " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + Assertions.assertEquals(7, idExplodeCommunity.count()); + + Assertions.assertEquals( + 5, idExplodeCommunity.filter("provenance = 'community:subject'").count()); + Assertions.assertEquals( + 2, idExplodeCommunity.filter("provenance = 'community:datasource'").count()); + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'covid-19'").count()); + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'fam'").count()); + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'aginfra'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'mes'").count()); + + query = + "select id, MyT.id community, size(MyT.datainfo) datainfosize " + + "from dataset " + + "lateral view explode (context) as MyT " + + "where size(MyT.datainfo) > 0"; + + org.apache.spark.sql.Dataset tmp2 = spark.sql(query); + + Assertions.assertEquals( + 2, + tmp2.select("datainfosize") + .where( + "id = '50|od______3989::05d8c751462f9bb8d2b06956dfbc5c7b' and " + + "community = 'aginfra'") + .collectAsList() + .get(0) + .getInt(0)); + + Assertions.assertEquals( + 1, + tmp2.select("datainfosize") + .where( + "id = '50|od______3989::05d8c751462f9bb8d2b06956dfbc5c7b' and " + + "community = 'covid-19'") + .collectAsList() + .get(0) + .getInt(0)); + + Assertions.assertEquals( + 2, + tmp2.select("datainfosize") + .where( + "id = '50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529' and " + + "community = 'fam'") + .collectAsList() + .get(0) + .getInt(0)); + Assertions.assertEquals( + 2, + tmp2.select("datainfosize") + .where( + "id = '50|od______3989::02dd5d2c222191b0b9bd4f33c8e96529' and " + + "community = 'covid-19'") + .collectAsList() + .get(0) + .getInt(0)); + + Assertions.assertEquals( + 1, + tmp2.select("datainfosize") + .where( + "id = '50|od______3989::0f89464c4ac4c398fe0c71433b175a62' and " + + "community = 'fam'") + .collectAsList() + .get(0) + .getInt(0)); + Assertions.assertEquals( + 1, + tmp2.select("datainfosize") + .where( + "id = '50|od______3989::0f89464c4ac4c398fe0c71433b175a62' and " + + "community = 'mes'") + .collectAsList() + .get(0) + .getInt(0)); + } + + @Test + public void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { + + SparkBulkTagJob2.main( + new String[] { + "-isTest", + Boolean.TRUE.toString(), + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-sourcePath", + getClass().getResource("/eu/dnetlib/dhp/sample/software/").getPath(), + "-taggingConf", + taggingConf, + "-resultTableName", + "eu.dnetlib.dhp.schema.oaf.Software", + "-outputPath", + workingDir.toString() + "/software", + "-isLookupUrl", + "http://beta.services.openaire.eu:8280/is/services/isLookUp", + "-protoMap", + "{ \"author\" : \"$['author'][*]['fullname']\"," + + " \"title\" : \"$['title'][*]['value']\"," + + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + + " \"contributor\" : \"$['contributor'][*]['value']\"," + + " \"description\" : \"$['description'][*]['value']\"}" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = + sc.textFile(workingDir.toString() + "/software") + .map(item -> OBJECT_MAPPER.readValue(item, Software.class)); + + Assertions.assertEquals(10, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = + spark.createDataset(tmp.rdd(), Encoders.bean(Software.class)); + + verificationDataset.createOrReplaceTempView("software"); + + String query = + "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " + + "from software " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + Assertions.assertEquals(10, idExplodeCommunity.count()); + + idExplodeCommunity.show(false); + Assertions.assertEquals( + 3, idExplodeCommunity.filter("provenance = 'community:subject'").count()); + Assertions.assertEquals( + 3, idExplodeCommunity.filter("provenance = 'community:datasource'").count()); + Assertions.assertEquals( + 4, idExplodeCommunity.filter("provenance = 'community:zenodocommunity'").count()); + + Assertions.assertEquals(3, idExplodeCommunity.filter("community = 'covid-19'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'dh-ch'").count()); + Assertions.assertEquals(4, idExplodeCommunity.filter("community = 'aginfra'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'dariah'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'fam'").count()); + + Assertions.assertEquals( + 2, + idExplodeCommunity + .filter( + "provenance = 'community:zenodocommunity' and " + + "id = '50|od______1582::4132f5ec9496f0d6adc7b00a50a56ff4' and (" + + "community = 'dh-ch' or community = 'dariah')") + .count()); + + query = + "select id, MyT.id community, size(MyT.datainfo) datainfosize " + + "from software " + + "lateral view explode (context) as MyT " + + "where size(MyT.datainfo) > 0"; + + org.apache.spark.sql.Dataset tmp2 = spark.sql(query); + + Assertions.assertEquals( + 2, + tmp2.select("datainfosize") + .where( + "id = '50|od______1582::501b25d420f808c8eddcd9b16e917f11' and " + + "community = 'covid-19'") + .collectAsList() + .get(0) + .getInt(0)); + + Assertions.assertEquals( + 3, + tmp2.select("datainfosize") + .where( + "id = '50|od______1582::581621232a561b7e8b4952b18b8b0e56' and " + + "community = 'aginfra'") + .collectAsList() + .get(0) + .getInt(0)); + } + + @Test + public void bulktagDatasourcewithConstraintsTest() throws Exception { + + SparkBulkTagJob2.main( + new String[] { + "-isTest", + Boolean.TRUE.toString(), + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-sourcePath", + getClass() + .getResource( + "/eu/dnetlib/dhp/sample/dataset/update_datasourcewithconstraints") + .getPath(), + "-taggingConf", + taggingConf, + "-resultTableName", + "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", + workingDir.toString() + "/dataset", + "-isLookupUrl", + "http://beta.services.openaire.eu:8280/is/services/isLookUp", + "-protoMap", + "{ \"author\" : \"$['author'][*]['fullname']\"," + + " \"title\" : \"$['title'][*]['value']\"," + + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + + " \"contributor\" : \"$['contributor'][*]['value']\"," + + " \"description\" : \"$['description'][*]['value']\"}" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = + sc.textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + + Assertions.assertEquals(10, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = + spark.createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); + + verificationDataset.createOrReplaceTempView("dataset"); + String query = + "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " + + "from dataset " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + + idExplodeCommunity.show(false); + Assertions.assertEquals(2, idExplodeCommunity.count()); + + Assertions.assertEquals( + 2, idExplodeCommunity.filter("provenance = 'community:datasource'").count()); + } +} diff --git a/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/CommunityConfigurationFactoryTest.java index 77c4482652..4a855bd3af 100644 --- a/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-bulktag/src/test/java/eu/dnetlib/dhp/CommunityConfigurationFactoryTest.java @@ -18,9 +18,6 @@ import org.junit.jupiter.api.Test; /** Created by miriam on 03/08/2018. */ public class CommunityConfigurationFactoryTest { - private static String xml; - private static String xml1; - private final VerbResolver resolver = new VerbResolver(); @Test diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml new file mode 100644 index 0000000000..dfea7b588c --- /dev/null +++ b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/communityconfiguration/tagging_conf.xml @@ -0,0 +1,1256 @@ + + + + + + + zenodo + + + + + + + + + + re3data_____::a507cdacc5bbcc08761c92185dee5cab + + + + + + + + + SDG13 - Climate action + SDG8 - Decent work and economic + growth + SDG15 - Life on land + SDG2 - Zero hunger + SDG17 - Partnerships for the + goals + SDG10 - Reduced inequalities + SDG5 - Gender equality + SDG12 - Responsible + consumption and production + SDG14 - Life below water + SDG6 - Clean water and + sanitation + SDG11 - Sustainable cities and communities + SDG1 - No poverty + SDG3 - + Good health and well being + SDG7 - Affordable and clean energy + SDG4 - Quality + education + SDG9 - Industry innovation and infrastructure + SDG16 - Peace justice + and strong institutions + + + + + + + + modern art + monuments + europeana data model + sites + field walking + frescoes + LIDO metadata schema + art history + excavation + Arts and Humanities General + cities + coins + temples + numismatics + lithics + roads + environmental archaeology + digital cultural heritage + archaeological reports + history + CRMba + churches + cultural heritage + archaeological stratigraphy + religious art + buidings + digital humanities + survey + archaeological sites + linguistic studies + bioarchaeology + architectural orders + palaeoanthropology + fine arts + europeana + CIDOC CRM + decorations + classic art + stratigraphy + digital archaeology + intangible cultural heritage + walls + humanities + chapels + CRMtex + Language and Literature + paintings + archaeology + fair data + mosaics + burials + architecture + medieval art + castles + CARARE metadata schema + statues + natural language processing + inscriptions + CRMsci + vaults + contemporary art + Arts and Humanities + CRMarchaeo + pottery + site + architectural + vessels + + + + re3data_____::9ebe127e5f3a0bf401875690f3bb6b81 + + + + doajarticles::c6cd4b532e12868c1d760a8d7cda6815 + + + + doajarticles::a6de4499bb87bf3c01add0a9e2c9ed0b + + + + doajarticles::6eb31d13b12bc06bbac06aef63cf33c9 + + + + doajarticles::0da84e9dfdc8419576169e027baa8028 + + + + re3data_____::84e123776089ce3c7a33db98d9cd15a8 + + + + openaire____::c5502a43e76feab55dd00cf50f519125 + + + + re3data_____::a48f09c562b247a9919acfe195549b47 + + + + opendoar____::97275a23ca44226c9964043c8462be96 + + + + + + storm + + + + crosscult + + + + wholodance_eu + + + + digcur2013 + + + + gravitate + + + + dipp2014 + + + + digitalhumanities + + + + dimpo + + + + adho + + + + chc + + + + wahr + + + + ibe + + + + ariadne + + + + parthenos-hub + + + + parthenos-training + + + + gandhara + + + + cmsouthasia + + + + nilgirihills + + + + shamsa_mustecio + + + + bodhgaya + + + + + + + + Stock Assessment + pelagic + Acoustic + Fish farming + Fisheries + Fishermen + maximum sustainable yield + trawler + Fishing vessel + Fisherman + Fishing gear + mackerel + RFMO + Fish Aggregating Device + Bycatch + Fishery + common fisheries policy + Fishing fleet + Aquaculture + + + + doajarticles::8cec81178926caaca531afbd8eb5d64c + + + + doajarticles::0f7a7f30b5400615cae1829f3e743982 + + + + doajarticles::9740f7f5af3e506d2ad2c215cdccd51a + + + + doajarticles::9f3fbaae044fa33cb7069b72935a3254 + + + + doajarticles::cb67f33eb9819f5c624ce0313957f6b3 + + + + doajarticles::e21c97cbb7a209afc75703681c462906 + + + + doajarticles::554cde3be9e5c4588b4c4f9f503120cb + + + + tubitakulakb::11e22f49e65b9fd11d5b144b93861a1b + + + + doajarticles::57c5d3837da943e93b28ec4db82ec7a5 + + + + doajarticles::a186f5ddb8e8c7ecc992ef51cf3315b1 + + + + doajarticles::e21c97cbb7a209afc75703681c462906 + + + + doajarticles::dca64612dfe0963fffc119098a319957 + + + + doajarticles::dd70e44479f0ade25aa106aef3e87a0a + + + + + + discardless + + + + farfish2020 + + + + facts + + + + climefish + + + + proeel + + + + primefish + + + + h2020_vicinaqua + + + + meece + + + + rlsadb + + + + iotc_ctoi + + + + + + + + brain mapping + brain imaging + electroencephalography + arterial spin labelling + brain fingerprinting + brain + neuroimaging + Multimodal Brain Image Analysis + fMRI + neuroinformatics + fetal brain + brain ultrasonic imaging + topographic brain mapping + diffusion tensor imaging + computerized knowledge assessment + connectome mapping + brain magnetic resonance imaging + brain abnormalities + + + + re3data_____::5b9bf9171d92df854cf3c520692e9122 + + + + doajarticles::c7d3de67dc77af72f6747157441252ec + + + + re3data_____::8515794670370f49c1d176c399c714f5 + + + + doajarticles::d640648c84b10d425f96f11c3de468f3 + + + + doajarticles::0c0e74daa5d95504eade9c81ebbd5b8a + + + + rest________::fb1a3d4523c95e63496e3bc7ba36244b + + + + + + neuroinformatics + + + + hbp + + + + from_neuroscience_to_machine_learning + + + + ci2c + + + + opensourcebrain + + + + brainspeak + + + + braincom + + + + nextgenvis + + + + meso-brain + + + + neuroplasticity-workshop + + + + bionics + + + + brainmattrain-676408 + + + + repronim + + + + affectiveneuro + + + + con + + + + lab_neurol_sperim_irfmn_irccs_milano_it + + + + + + + + marine + ocean + fish + aqua + sea + + + + + adriplan + + + + devotes-project + + + + euro-basin + + + + naclim + + + + discardless + + + + assisibf + + + + meece + + + + facts + + + + proeel + + + + aquatrace + + + + myfish + + + + atlas + + + + blue-actionh2020 + + + + sponges + + + + merces_project + + + + bigdataocean + + + + columbus + + + + h2020-aquainvad-ed + + + + aquarius + + + + southern-ocean-observing-system + + + + eawag + + + + mossco + + + + onc + + + + oceanbiogeochemistry + + + + oceanliteracy + + + + openearth + + + + ocean + + + + calcifierraman + + + + bermudabream + + + + brcorp1 + + + + mce + + + + biogeochem + + + + ecc2014 + + + + fisheries + + + + sedinstcjfas + + + + narmada + + + + umr-entropie + + + + farfish2020 + + + + primefish + + + + zf-ilcs + + + + climefish + + + + afrimed_eu + + + + spi-ace + + + + cice-consortium + + + + nemo-ocean + + + + mesopp-h2020 + + + + marxiv + + + + + + + + + + + instruct + + + + west-life + + + + + + + + animal production and health + fisheries and aquaculture + food safety and human nutrition + information management + food technology + agri-food education and extension + natural resources and environment + food system + engineering technology and Research + agriculture + food safety risk assessment + food security + farming practices and systems + plant production and protection + agri-food economics and policy + Agri-food + food distribution + forestry + + + + opendoar____::1a551829d50f1400b0dab21fdd969c04 + + + + opendoar____::49af6c4e558a7569d80eee2e035e2bd7 + + + + opendoar____::0266e33d3f546cb5436a10798e657d97 + + + + opendoar____::fd4c2dc64ccb8496e6f1f94c85f30d06 + + + + opendoar____::41bfd20a38bb1b0bec75acf0845530a7 + + + + opendoar____::87ae6fb631f7c8a627e8e28785d9992d + + + + + + edenis + + + + efsa-pilot + + + + egene3 + + + + efsa-kj + + + + euromixproject + + + + discardless + + + + sedinstcjfst + + + + afinet-kc + + + + 2231-4784 + + + + 2231-0606 + + + + solace + + + + pa17 + + + + smartakis + + + + sedinstcjae + + + + phenology_camera + + + + aginfra + + + + erosa + + + + bigdatagrapes + + + + + + + + + + opendoar____::7e7757b1e12abcb736ab9a754ffb617a + {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} + + + opendoar____::96da2f590cd7246bbde0051047b0d6f7 + {"criteria":[{"constraint":[{"verb":"contains","field":"contributor","value":"DARIAH"}]}]} + + + + + dimpo + + + + + + + + Green Transport + City mobility systems + Vulnerable road users + Traffic engineering + Transport electrification + Mobility + Intermodal freight transport + Clean vehicle fleets + Intelligent mobility + Inflight refueling + District mobility systems + Navigation and control systems for optimised planning and routing + European Space Technology Platform + European Transport networks + Green cars + Inter-modality infrastructures + Advanced Take Off and Landing Ideas + Sustainable urban systems + port-area railway networks + Innovative forms of urban transport + Alliance for Logistics Innovation through Collaboration in Europe + Advisory Council for Aeronautics Research in Europe + Mobility services for people and goods + Guidance and traffic management + Passenger mobility + Smart mobility and services + transport innovation + high-speed railway + Vehicle design + Inland shipping + public transportation + aviation’s climate impact + Road transport + On-demand public transport + Personal Air Transport + Transport + transport vulnerability + Pipeline transport + European Association of Aviation Training and Education Organisations + Defrosting of railway infrastructure + Inclusive and affordable transport + River Information Services + jel:L92 + Increased use of public transport + Seamless mobility + STRIA + trolleybus transport + Intelligent Transport System + Low-emission alternative energy for transport + Shared mobility for people and goods + Business model for urban mobility + Interoperability of transport systems + Cross-border train slot booking + Air transport + Transport pricing + Sustainable transport + European Rail Transport Research Advisory Council + Alternative aircraft configurations + Transport and Mobility + Railways applications + urban transport + Environmental impact of transport + urban freight delivery systems + Automated Road Transport + Alternative fuels in public transport + Active LIDAR-sensor for GHG-measurements + Autonomous logistics operations + Rational use of motorised transport + Network and traffic management systems + electrification of railway wagons + Single European Sky + Electrified road systems + transportation planning + Railway dynamics + Motorway of the Sea + smart railway communications + Maritime transport + Environmental- friendly transport + Combined transport + Connected automated driving technology + Innovative freight logistics services + automated and shared vehicles + Alternative Aircraft Systems + Land-use and transport interaction + Public transport system + Business plan for shared mobility + Shared mobility + Growing of mobility demand + European Road Transport Research Advisory Council + WATERBORNE ETP + Effective transport management system + Short Sea Shipping + air traffic management + Sea hubs and the motorways of the sea + Urban mobility solutions + Smart city planning + Maritime spatial planning + EUropean rail Research Network of Excellence + Transport governance + ENERGY CONSUMPTION BY THE TRANSPORT SECTOR + Integrated urban plan + inland waterway services + European Conference of Transport Research Institutes + air vehicles + E-freight + Automated Driving + Automated ships + pricing for cross-border passenger transport + Vehicle efficiency + Railway transport + Electric vehicles + Road traffic monitoring + Deep sea shipping + Circular economy in transport + Traffic congestion + air transport system + Urban logistics + Rail transport + OpenStreetMap + high speed rail + Transportation engineering + Intermodal travel information + Flight Data Recorders + Advanced driver assistance systems + long distance freight transport + Inland waterway transport + Smart mobility + Mobility integration + Personal Rapid Transit system + Safety measures & requirements for roads + Green rail transport + Electrical + Vehicle manufacturing + Future Airport Layout + Rail technologies + European Intermodal Research Advisory Council + inland navigation + Automated urban vehicles + ECSS-standards + Traveller services + Polluting transport + Air Traffic Control + Cooperative and connected and automated transport + Innovative powertrains + Quality of transport system and services + door-to- door logistics chain + Inter-modal aspects of urban mobility + travel (and mobility) + Innovative freight delivery systems + urban freight delivery infrastructures + + + + doajarticles::1c5bdf8fca58937894ad1441cca99b76 + + + + doajarticles::b37a634324a45c821687e6e80e6f53b4 + + + + doajarticles::4bf64f2a104040e4e055cd9594b2d77c + + + + doajarticles::479ca537c12755d1868bbf02938a900c + + + + doajarticles::55f31df96a60e2309f45b7c265fcf7a2 + + + + doajarticles::c52a09891a5301f9986ebbfe3761810c + + + + doajarticles::379807bc7f6c71a227ef1651462c414c + + + + doajarticles::36069db531a00b85a2e8fb301f4bdc19 + + + + doajarticles::b6a898da311ded96fabf49c520b80d5d + + + + doajarticles::d0753d9180b35a271d8b4a31f449749f + + + + doajarticles::172050a92511838393a3fe237ae47e31 + + + + doajarticles::301ed96c62abb160a3e29796efe5c95c + + + + doajarticles::0f4f805b3d842f2c7f1b077c3426fa59 + + + + doajarticles::ba73728b84437b8d48ae287b867c7215 + + + + doajarticles::86faef424d804309ccf45f692523aa48 + + + + doajarticles::73bd758fa41671de70964c3ecba013af + + + + doajarticles::e661fc0bdb24af42b740a08f0ddc6cf4 + + + + doajarticles::a6d3052047d5dbfbd43d95b4afb0f3d7 + + + + doajarticles::ca61df07089acc53a1569bde6673d82a + + + + doajarticles::237dd6f1606600459d0297abd8ed9976 + + + + doajarticles::fba6191177ede7c51ea1cdf58eae7f8b + + + + + + jsdtl + + + + utc-martrec + + + + utc-uti + + + + stp + + + + c2smart + + + + stride-utc + + + + crowd4roads + + + + lemo + + + + imov3d + + + + tra2018 + + + + optimum + + + + stars + + + + iecteim + + + + iccpt2019 + + + + + + + + COVID-19 + Severe acute respiratory syndrome coronavirus 2 + SARS-CoV-2 + COVID19 + 2019 novel coronavirus + coronavirus disease 2019 + HCoV-19 + mesh:C000657245 + 2019-nCoV + coronavirus disease-19 + mesh:COVID-19 + COVID2019 + + + + opendoar____::358aee4cc897452c00244351e4d91f69 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"sars-cov-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + re3data_____::7b0ad08687b2c960d5aeef06f811d5e6 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + driver______::bee53aa31dc2cbb538c10c2b65fa5824 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + openaire____::437f4b072b1aa198adcbc35910ff3b98 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + openaire____::081b82f96300b6a6e3d282bad31cb6e2 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + openaire____::9e3be59865b2c1c335d32dae2fe7b254 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + opendoar____::8b6dd7db9af49e67306feb59a8bdc52c + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + share_______::4719356ec8d7d55d3feb384ce879ad6c + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + share_______::bbd802baad85d1fd440f32a7a3a2c2b1 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + opendoar____::6f4922f45568161a8cdf4ad2299f6d23 + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]}, + {"constraint":[{"verb":"contains","field":"title","value":"2019-nCoV"}]}]} + + + + re3data_____::7980778c78fb4cf0fab13ce2159030dc + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains","field":"title","value":"2019-nCov"}]}]} + + + re3data_____::978378def740bbf2bfb420de868c460b + {"criteria":[{"constraint":[{"verb":"contains","field":"title","value":"SARS-CoV-2"}]},{"constraint":[{"verb":"contains","field":"title","value":"COVID-19"}]},{"constraint":[{"verb":"contains","field":"title","value":"2019-nCov"}]}]} + + + + + chicago-covid-19 + + + + covid-19-senacyt-panama-sample + + + + covid-19-tx-rct-stats-review + + + + covid_19_senacyt_abc_panama + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/dataset_10.json.gz b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/no_updates/dataset_10.json.gz similarity index 100% rename from dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/dataset_10.json.gz rename to dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/no_updates/dataset_10.json.gz diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_datasourcewithconstraints/dataset_10.json.gz b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_datasourcewithconstraints/dataset_10.json.gz new file mode 100644 index 0000000000..2eb33c5a49 Binary files /dev/null and b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_datasourcewithconstraints/dataset_10.json.gz differ diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject/contextnoprovenance/dataset_10.json.gz b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject/contextnoprovenance/dataset_10.json.gz new file mode 100644 index 0000000000..ee62cd7913 Binary files /dev/null and b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject/contextnoprovenance/dataset_10.json.gz differ diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject/nocontext/dataset_10.json.gz b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject/nocontext/dataset_10.json.gz new file mode 100644 index 0000000000..cf3c3aa7b6 Binary files /dev/null and b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject/nocontext/dataset_10.json.gz differ diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject_datasource/dataset_10.json.gz b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject_datasource/dataset_10.json.gz new file mode 100644 index 0000000000..fdc76a04c8 Binary files /dev/null and b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/dataset/update_subject_datasource/dataset_10.json.gz differ diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/otherresearchproduct/otherresearchproduct_10.json.gz b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/otherresearchproduct/otherresearchproduct_10.json.gz deleted file mode 100644 index 20b6a4dba3..0000000000 Binary files a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/otherresearchproduct/otherresearchproduct_10.json.gz and /dev/null differ diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/otherresearchproduct/update_zenodocommunity/otherresearchproduct_10.json.gz b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/otherresearchproduct/update_zenodocommunity/otherresearchproduct_10.json.gz new file mode 100644 index 0000000000..ea9e212bd4 Binary files /dev/null and b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/otherresearchproduct/update_zenodocommunity/otherresearchproduct_10.json.gz differ diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/publication/publication_10.json.gz b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/publication/publication_10.json.gz deleted file mode 100644 index 257e0db3a4..0000000000 Binary files a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/publication/publication_10.json.gz and /dev/null differ diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/publication/update_datasource/publication_10.json.gz b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/publication/update_datasource/publication_10.json.gz new file mode 100644 index 0000000000..99c4015e71 Binary files /dev/null and b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/publication/update_datasource/publication_10.json.gz differ diff --git a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/software/software_10.json.gz b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/software/software_10.json.gz index a5b8c8774c..3dcadf41d7 100644 Binary files a/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/software/software_10.json.gz and b/dhp-workflows/dhp-bulktag/src/test/resources/eu/dnetlib/dhp/sample/software/software_10.json.gz differ