From 83bb97be8337a2c26ed5eaed1d5edeb8d5890b54 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 14 Feb 2024 11:23:47 +0100 Subject: [PATCH] [Tagging Projects and Datasource] added test to check datasource tagging. Fixed issue --- .../main/java/eu/dnetlib/dhp/api/Utils.java | 13 +++- .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 25 ++++++-- .../bulktag/input_bulkTag_parameters.json | 2 +- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 61 ++++++++++++++++++- .../publication/update_datasource/datasource | 3 + .../publication/update_datasource/project | 0 6 files changed, 95 insertions(+), 9 deletions(-) create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index bbdfbb96e..c990d6ebe 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -10,6 +10,7 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Project; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -177,6 +178,8 @@ public class Utils implements Serializable { public static List getDatasourceCommunities(String baseURL)throws IOException{ List validCommunities = getValidCommunities(baseURL); HashMap> map = new HashMap<>(); + String entityPrefix = ModelSupport.getIdPrefix(Datasource.class) + "|" ; + validCommunities.forEach(c -> { try { new ObjectMapper().readValue(QueryCommunityAPI.communityDatasource(c.getId(), baseURL), DatasourceList.class) @@ -191,10 +194,18 @@ public class Utils implements Serializable { } }); + List temp = map.keySet().stream() + .map(k -> EntityCommunities.newInstance(entityPrefix + k, getCollect(k, map))).collect(Collectors.toList()); - return map.keySet().stream().map(k -> EntityCommunities.newInstance(k, map.get(k).stream().collect(Collectors.toList()))).collect(Collectors.toList()); + return temp; } + @NotNull + private static List getCollect(String k, HashMap> map) { + List temp = map.get(k).stream().collect(Collectors.toList()); + return temp; + } + } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 4bd770722..33c416b0d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -9,7 +9,6 @@ import java.util.stream.Collectors; import eu.dnetlib.dhp.api.model.CommunityEntityMap; import eu.dnetlib.dhp.api.model.EntityCommunities; -import eu.dnetlib.dhp.api.model.DatasourceCommunitiesList; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Project; @@ -109,14 +108,20 @@ public class SparkBulkTagJob { .map((MapFunction, Project>) t2 -> { Project ds = t2._1(); if (t2._2() != null){ - List context = ds.getContext().stream().map(c -> c.getId()).collect(Collectors.toList()); + List context = + Optional.ofNullable(ds.getContext()) + .map(v -> v.stream().map(c -> c.getId()).collect(Collectors.toList())) + .orElse(new ArrayList<>()); + + if(!Optional.ofNullable(ds.getContext()).isPresent()) + ds.setContext(new ArrayList<>()); t2._2().getCommunitiesId().forEach(c -> { if(!context.contains(c)){ Context con = new Context(); con.setId(c); con.setDataInfo(Arrays.asList(OafMapperUtils.dataInfo(false,TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false, OafMapperUtils.qualifier(TaggingConstants.CLASS_ID_DATASOURCE, TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), "1"))); - ds.getContext().add(con) + ds.getContext().add(con); } }); } @@ -139,19 +144,27 @@ public class SparkBulkTagJob { Dataset datasource = readPath(spark, inputPath + "datasource", Datasource.class); Dataset dc = spark.createDataset(datasourceCommunities, Encoders.bean(EntityCommunities.class)); - + datasource.joinWith(dc, datasource.col("id").equalTo(dc.col("entityId")), "left") .map((MapFunction, Datasource>) t2 -> { Datasource ds = t2._1(); if (t2._2() != null){ - List context = ds.getContext().stream().map(c -> c.getId()).collect(Collectors.toList()); + + List context = + Optional.ofNullable(ds.getContext()) + .map(v -> v.stream().map(c -> c.getId()).collect(Collectors.toList())) + .orElse(new ArrayList<>()); + + if(!Optional.ofNullable(ds.getContext()).isPresent()) + ds.setContext(new ArrayList<>()); + t2._2().getCommunitiesId().forEach(c -> { if(!context.contains(c)){ Context con = new Context(); con.setId(c); con.setDataInfo(Arrays.asList(OafMapperUtils.dataInfo(false,TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false, OafMapperUtils.qualifier(TaggingConstants.CLASS_ID_DATASOURCE, TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), "1"))); - ds.getContext().add(con) + ds.getContext().add(con); } }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json index ce1a8ecab..f8fe91223 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json @@ -33,6 +33,6 @@ "paramName": "bu", "paramLongName": "baseURL", "paramDescription": "this parameter is to specify the api to be queried (beta or production)", - "paramRequired": false + "paramRequired": true } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 01be4d0a3..882e259cf 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -325,7 +325,7 @@ public class BulkTagJobTest { "-taggingConf", taggingConf, "-outputPath", workingDir.toString() + "/", - + "-baseURL", "https://services.openaire.eu/openaire/community/", "-pathMap", pathMap }); @@ -349,6 +349,8 @@ public class BulkTagJobTest { org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + idExplodeCommunity.show(false); + Assertions.assertEquals(5, idExplodeCommunity.count()); Assertions .assertEquals( @@ -383,6 +385,63 @@ public class BulkTagJobTest { .count()); } + @Test + void datasourceTag() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/") + .getPath(); + SparkBulkTagJob + .main( + new String[] { + + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + + "-outputPath", workingDir.toString() + "/", + "-baseURL", "https://services.openaire.eu/openaire/community/", + "-pathMap", pathMap + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/datasource") + .map(item -> OBJECT_MAPPER.readValue(item, Datasource.class)); + + Assertions.assertEquals(3, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Datasource.class)); + + verificationDataset.createOrReplaceTempView("datasource"); + + String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " + + "from datasource " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + + idExplodeCommunity.show(false); + + Assertions.assertEquals(3, idExplodeCommunity.count()); + Assertions + .assertEquals( + 3, idExplodeCommunity.filter("provenance = 'community:datasource'").count()); + Assertions + .assertEquals( + 3, + idExplodeCommunity + .filter("name = 'Bulktagging for Community - Datasource'") + .count()); + + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'dh-ch'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'clarin'").count()); + + + } + @Test void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/datasource b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/datasource index e69de29bb..98cd3649a 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/datasource +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/datasource @@ -0,0 +1,3 @@ +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"consenttermsofuse":false,"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2019-07-26","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Tobacco Use Insights"},"eoscdatasourcetype":{"classid":"Journal archive","classname":"Journal Archive","schemeid":"dnet:eosc_datasource_types","schemename":"dnet:eosc_datasource_types"},"eosctype":{"classid":"Data Source","classname":"Data Source","schemeid":"","schemename":""},"extraInfo":[],"fulltextdownload":false,"id":"10|re3data_____::a507cdacc5bbcc08761c92185dee5cab","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1179-173X","issnPrinted":"","name":"Tobacco Use Insights"},"languages":[],"lastupdatetimestamp":1680789947124,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj1179173X"},"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Tobacco Use Insights"},"openairecompatibility":{"classid":"openaire3.0","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1179-173X"],"pid":[],"policies":[],"researchentitytypes":["Literature"],"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Medicine: Public aspects of medicine"}],"thematic":false,"versioncontrol":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://journals.sagepub.com/home/tui"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::2e06c1122c7df43765fdcf91080824fa","value":"EOSC Service Catalogue"}],"consenttermsofuse":false,"contactemail":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eosc@ill.eu"},"contentpolicies":[],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"datasourcetype":{"classid":"service","classname":"service","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"service","classname":"service","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2022-07-13","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue is a database of software used mainly for data analysis of neutron and photon experiments. This database can be freely consulted. It gives an overview of software available for neutron and photon experiments and their use with respect to instruments at experimental facilities."},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue"},"eosctype":{"classid":"Service","classname":"Service","schemeid":"","schemename":""},"extraInfo":[],"fulltextdownload":false,"id":"10|doajarticles::c6cd4b532e12868c1d760a8d7cda6815","languages":["eng"],"lastupdatetimestamp":1680789947124,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://software.pan-data.eu/bundles/app/images/pandata-logo.png"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eoscdbb03112"},"odlanguages":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eng"}],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue"},"openairecompatibility":{"classid":"openaire3.0","classname":"Not yet registered","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["eosc________::ill::ill.panosc_software_catalogue"],"pid":[],"policies":[],"researchentitytypes":[],"researchproductaccesspolicies":[],"researchproductmetadataaccesspolicies":[],"subjects":[],"thematic":false,"versioncontrol":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://software.pan-data.eu/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::2e06c1122c7df43765fdcf91080824fa","value":"EOSC Service Catalogue"}],"consenttermsofuse":false,"contactemail":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eosc@ill.eu"},"contentpolicies":[],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"datasourcetype":{"classid":"service","classname":"service","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"service","classname":"service","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2022-07-13","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue is a database of software used mainly for data analysis of neutron and photon experiments. This database can be freely consulted. It gives an overview of software available for neutron and photon experiments and their use with respect to instruments at experimental facilities."},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue"},"eosctype":{"classid":"Service","classname":"Service","schemeid":"","schemename":""},"extraInfo":[],"fulltextdownload":false,"id":"10|eosc________::7ef2576047f040612b983a27347471fc","languages":["eng"],"lastupdatetimestamp":1680789947124,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://software.pan-data.eu/bundles/app/images/pandata-logo.png"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eoscdbb03112"},"odlanguages":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eng"}],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue"},"openairecompatibility":{"classid":"openaire3.0","classname":"Not yet registered","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["eosc________::ill::ill.panosc_software_catalogue"],"pid":[],"policies":[],"researchentitytypes":[],"researchproductaccesspolicies":[],"researchproductmetadataaccesspolicies":[],"subjects":[],"thematic":false,"versioncontrol":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://software.pan-data.eu/"}} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project new file mode 100644 index 000000000..e69de29bb