diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index 8adc889201..040c897829 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -95,7 +95,7 @@ public class SparkAtomicActionScoreJob implements Serializable { return projectScores.map((MapFunction) bipProjectScores -> { Project project = new Project(); - // project.setId(bipProjectScores.getProjectId()); + project.setId(bipProjectScores.getProjectId()); project.setMeasures(bipProjectScores.toMeasures()); return project; }, Encoders.bean(Project.class)) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java index cf33c65099..e56cdab725 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java @@ -6,6 +6,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; +import java.util.List; import org.jetbrains.annotations.NotNull; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index 06d0f95c25..27fb37e5b6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -3,14 +3,10 @@ package eu.dnetlib.dhp.api; import java.io.IOException; import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Objects; +import java.util.*; import java.util.stream.Collectors; -import javax.management.Query; - +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +20,10 @@ import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; import eu.dnetlib.dhp.bulktag.community.Provider; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; -import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; /** * @author miriam.baglioni @@ -58,7 +57,7 @@ public class Utils implements Serializable { if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled())) return null; Provider p = new Provider(); - p.setOpenaireId("10|" + d.getOpenaireId()); + p.setOpenaireId(ModelSupport.getIdPrefix(Datasource.class) + "|" + d.getOpenaireId()); p.setSelectionConstraints(d.getSelectioncriteria()); if (p.getSelectionConstraints() != null) p.getSelectionConstraints().setSelection(resolver); @@ -113,6 +112,7 @@ public class Utils implements Serializable { */ public static CommunityEntityMap getCommunityOrganization(String baseURL) throws IOException { CommunityEntityMap organizationMap = new CommunityEntityMap(); + String entityPrefix = ModelSupport.getIdPrefix(Organization.class); getValidCommunities(baseURL) .forEach(community -> { String id = community.getId(); @@ -124,9 +124,9 @@ public class Utils implements Serializable { if (!organizationMap .keySet() .contains( - "20|" + o)) - organizationMap.put("20|" + o, new ArrayList<>()); - organizationMap.get("20|" + o).add(community.getId()); + entityPrefix + "|" + o)) + organizationMap.put(entityPrefix + "|" + o, new ArrayList<>()); + organizationMap.get(entityPrefix + "|" + o).add(community.getId()); }); } catch (IOException e) { throw new RuntimeException(e); @@ -138,7 +138,7 @@ public class Utils implements Serializable { public static CommunityEntityMap getCommunityProjects(String baseURL) throws IOException { CommunityEntityMap projectMap = new CommunityEntityMap(); - + String entityPrefix = ModelSupport.getIdPrefix(Project.class); getValidCommunities(baseURL) .forEach(community -> { int page = -1; @@ -155,9 +155,9 @@ public class Utils implements Serializable { ContentModel.class); if (cm.getContent().size() > 0) { cm.getContent().forEach(p -> { - if (!projectMap.keySet().contains("40|" + p.getOpenaireId())) - projectMap.put("40|" + p.getOpenaireId(), new ArrayList<>()); - projectMap.get("40|" + p.getOpenaireId()).add(community.getId()); + if (!projectMap.keySet().contains(entityPrefix + "|" + p.getOpenaireId())) + projectMap.put(entityPrefix + "|" + p.getOpenaireId(), new ArrayList<>()); + projectMap.get(entityPrefix + "|" + p.getOpenaireId()).add(community.getId()); }); } } catch (IOException e) { @@ -174,4 +174,41 @@ public class Utils implements Serializable { .map(community -> community.getId()) .collect(Collectors.toList()); } + + public static List getDatasourceCommunities(String baseURL) throws IOException { + List validCommunities = getValidCommunities(baseURL); + HashMap> map = new HashMap<>(); + String entityPrefix = ModelSupport.getIdPrefix(Datasource.class) + "|"; + + validCommunities.forEach(c -> { + try { + new ObjectMapper() + .readValue(QueryCommunityAPI.communityDatasource(c.getId(), baseURL), DatasourceList.class) + .forEach(d -> { + if (!map.keySet().contains(d.getOpenaireId())) + map.put(d.getOpenaireId(), new HashSet<>()); + + map.get(d.getOpenaireId()).add(c.getId()); + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + List temp = map + .keySet() + .stream() + .map(k -> EntityCommunities.newInstance(entityPrefix + k, getCollect(k, map))) + .collect(Collectors.toList()); + + return temp; + + } + + @NotNull + private static List getCollect(String k, HashMap> map) { + List temp = map.get(k).stream().collect(Collectors.toList()); + return temp; + } + } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/EntityCommunities.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/EntityCommunities.java new file mode 100644 index 0000000000..cac02c0721 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/EntityCommunities.java @@ -0,0 +1,40 @@ + +package eu.dnetlib.dhp.api.model; + +import java.io.Serializable; +import java.util.List; + +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Datasource; + +/** + * @author miriam.baglioni + * @Date 13/02/24 + */ +public class EntityCommunities implements Serializable { + private String entityId; + private List communitiesId; + + public String getEntityId() { + return entityId; + } + + public void setEntityId(String entityId) { + this.entityId = entityId; + } + + public List getCommunitiesId() { + return communitiesId; + } + + public void setCommunitiesId(List communitiesId) { + this.communitiesId = communitiesId; + } + + public static EntityCommunities newInstance(String dsid, List csid) { + EntityCommunities dsc = new EntityCommunities(); + dsc.entityId = dsid; + dsc.communitiesId = csid; + return dsc; + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index e20fcb081a..b09543da17 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -4,9 +4,23 @@ package eu.dnetlib.dhp.bulktag; import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.*; +import java.util.stream.Collectors; +import java.util.zip.GZIPOutputStream; +import org.apache.avro.TestAnnotation; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; @@ -17,17 +31,24 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; +import com.sun.media.sound.ModelInstrumentComparator; import eu.dnetlib.dhp.api.Utils; +import eu.dnetlib.dhp.api.model.CommunityEntityMap; +import eu.dnetlib.dhp.api.model.EntityCommunities; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bulktag.community.*; -import eu.dnetlib.dhp.schema.common.EntityType; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Context; import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import scala.Tuple2; public class SparkBulkTagJob { @@ -47,6 +68,7 @@ public class SparkBulkTagJob { .getResourceAsStream( "/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json")); + log.info(args.toString()); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -65,8 +87,20 @@ public class SparkBulkTagJob { final String baseURL = parser.get("baseURL"); log.info("baseURL: {}", baseURL); - ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class); - log.info("pathMap: {}", new Gson().toJson(protoMappingParams)); + log.info("pathMap: {}", parser.get("pathMap")); + String protoMappingPath = parser.get("pathMap"); + + final String hdfsNameNode = parser.get("nameNode"); + log.info("nameNode: {}", hdfsNameNode); + + Configuration configuration = new Configuration(); + configuration.set("fs.defaultFS", hdfsNameNode); + FileSystem fs = FileSystem.get(configuration); + + String temp = IOUtils.toString(fs.open(new Path(protoMappingPath)), StandardCharsets.UTF_8); + log.info("protoMap: {}", temp); + ProtoMap protoMap = new Gson().fromJson(temp, ProtoMap.class); + log.info("pathMap: {}", new Gson().toJson(protoMap)); SparkConf conf = new SparkConf(); CommunityConfiguration cc; @@ -88,10 +122,130 @@ public class SparkBulkTagJob { isSparkSessionManaged, spark -> { extendCommunityConfigurationForEOSC(spark, inputPath, cc); - execBulkTag(spark, inputPath, outputPath, protoMappingParams, cc); + execBulkTag( + spark, inputPath, outputPath, protoMap, cc); + execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL)); + execProjectTag(spark, inputPath, outputPath, Utils.getCommunityProjects(baseURL)); }); } + private static void execProjectTag(SparkSession spark, String inputPath, String outputPath, + CommunityEntityMap communityProjects) { + Dataset projects = readPath(spark, inputPath + "project", Project.class); + Dataset pc = spark + .createDataset( + communityProjects + .keySet() + .stream() + .map(k -> EntityCommunities.newInstance(k, communityProjects.get(k))) + .collect(Collectors.toList()), + Encoders.bean(EntityCommunities.class)); + + projects + .joinWith(pc, projects.col("id").equalTo(pc.col("entityId")), "left") + .map((MapFunction, Project>) t2 -> { + Project ds = t2._1(); + if (t2._2() != null) { + List context = Optional + .ofNullable(ds.getContext()) + .map(v -> v.stream().map(c -> c.getId()).collect(Collectors.toList())) + .orElse(new ArrayList<>()); + + if (!Optional.ofNullable(ds.getContext()).isPresent()) + ds.setContext(new ArrayList<>()); + t2._2().getCommunitiesId().forEach(c -> { + if (!context.contains(c)) { + Context con = new Context(); + con.setId(c); + con + .setDataInfo( + Arrays + .asList( + OafMapperUtils + .dataInfo( + false, TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + TaggingConstants.CLASS_ID_DATASOURCE, + TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "1"))); + ds.getContext().add(con); + } + }); + } + return ds; + }, Encoders.bean(Project.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "project"); + + readPath(spark, outputPath + "project", Datasource.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + "project"); + } + + private static void execDatasourceTag(SparkSession spark, String inputPath, String outputPath, + List datasourceCommunities) { + Dataset datasource = readPath(spark, inputPath + "datasource", Datasource.class); + + Dataset dc = spark + .createDataset(datasourceCommunities, Encoders.bean(EntityCommunities.class)); + + datasource + .joinWith(dc, datasource.col("id").equalTo(dc.col("entityId")), "left") + .map((MapFunction, Datasource>) t2 -> { + Datasource ds = t2._1(); + if (t2._2() != null) { + + List context = Optional + .ofNullable(ds.getContext()) + .map(v -> v.stream().map(c -> c.getId()).collect(Collectors.toList())) + .orElse(new ArrayList<>()); + + if (!Optional.ofNullable(ds.getContext()).isPresent()) + ds.setContext(new ArrayList<>()); + + t2._2().getCommunitiesId().forEach(c -> { + if (!context.contains(c)) { + Context con = new Context(); + con.setId(c); + con + .setDataInfo( + Arrays + .asList( + OafMapperUtils + .dataInfo( + false, TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false, + OafMapperUtils + .qualifier( + TaggingConstants.CLASS_ID_DATASOURCE, + TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE, + ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "1"))); + ds.getContext().add(con); + } + }); + } + return ds; + }, Encoders.bean(Datasource.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "datasource"); + + readPath(spark, outputPath + "datasource", Datasource.class) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + "datasource"); + } + private static void extendCommunityConfigurationForEOSC(SparkSession spark, String inputPath, CommunityConfiguration cc) { @@ -129,6 +283,11 @@ public class SparkBulkTagJob { ProtoMap protoMappingParams, CommunityConfiguration communityConfiguration) { + try { + System.out.println(new ObjectMapper().writeValueAsString(protoMappingParams)); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } ModelSupport.entityTypes .keySet() .parallelStream() diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/ExecSubstringAction.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/ExecSubstringAction.java index 444e8b82dd..52ed373379 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/ExecSubstringAction.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/actions/ExecSubstringAction.java @@ -38,7 +38,6 @@ public class ExecSubstringAction implements Serializable { } public String execSubstring() { - return this.value.substring(Integer.valueOf(this.from), Integer.valueOf(this.to)); } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties index 05db040903..52c2cafcee 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -1,4 +1,4 @@ -sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched +sourcePath=/tmp/beta_provision/graph/09_graph_orcid_enriched resumeFrom=ResultProject allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo allowedsemrelsresultproject=isSupplementedBy;isSupplementTo @@ -7,20 +7,23 @@ datasourceWhitelistForCountryPropagation=10|opendoar____::16e6a3326dd7d868cbc926 #allowedtypes=pubsrepository::institutional allowedtypes=Institutional outputPath=/tmp/miriam/graph/11_graph_orcid -pathMap ={"author":"$['author'][*]['fullname']", \ - "title":"$['title'][*]['value']",\ - "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\ - "orcid_pending":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid_pending')]['value']" ,\ - "contributor" : "$['contributor'][*]['value']",\ - "description" : "$['description'][*]['value']",\ - "subject" :"$['subject'][*]['value']" , \ - "fos" : "$['subject'][?(@['qualifier']['classid']=='FOS')].value" ,\ - "sdg" : "$['subject'][?(@['qualifier']['classid']=='SDG')].value",\ - "journal":"$['journal'].name",\ - "hostedby":"$['instance'][*]['hostedby']['key']",\ - "collectedfrom":"$['instance'][*]['collectedfrom']['key']",\ - "publisher":"$['publisher'].value",\ - "publicationyear":"$['dateofacceptance'].value"} +pathMap ={"author":{"path":"$['author'][*]['fullname']"}, \ + "title":{"path":"$['title'][*]['value']"},\ + "orcid":{"path":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']"} ,\ + "orcid_pending":{"path":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid_pending')]['value']"} ,\ + "contributor" : {"path":"$['contributor'][*]['value']"},\ + "description" : {"path":"$['description'][*]['value']"},\ + "subject" :{"path":"$['subject'][*]['value']"}, \ + "fos" : {"path":"$['subject'][?(@['qualifier']['classid']=='FOS')].value"} ,\ + "sdg" : {"path":"$['subject'][?(@['qualifier']['classid']=='SDG')].value"},\ + "journal":{"path":"$['journal'].name"},\ + "hostedby":{"path":"$['instance'][*]['hostedby']['key']"},\ + "collectedfrom":{"path":"$['instance'][*]['collectedfrom']['key']"},\ + "publisher":{"path":"$['publisher'].value"},\ + "publicationyear":{"path":"$['dateofacceptance'].value", "action":{"class":"eu.dnetlib.dhp.bulktag.actions.ExecSubstringAction",\ + "method":"execSubstring",\ + "params":[{"param_name":"From","param_value":0},\ + {"param_name":"To","param_value":4}]}}} blacklist=empty allowedpids=orcid;orcid_pending baseURL = https://services.openaire.eu/openaire/community/ diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json index ce1a8ecab6..36c9600fec 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json @@ -33,6 +33,11 @@ "paramName": "bu", "paramLongName": "baseURL", "paramDescription": "this parameter is to specify the api to be queried (beta or production)", - "paramRequired": false - } + "paramRequired": true + },{ + "paramName": "nn", + "paramLongName": "nameNode", + "paramDescription": "this parameter is to specify the api to be queried (beta or production)", + "paramRequired": true +} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml index 2695253e62..c1675239c9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml @@ -53,10 +53,10 @@ memoryOverhead - 3G + 4G partitions - 3284 + 15000 \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml index c7a9e8a263..c4b4b7d641 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -76,6 +76,7 @@ --outputPath${workingDir}/bulktag/ --pathMap${pathMap} --baseURL${baseURL} + --nameNode${nameNode} diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 433e046cc8..a5280a3b30 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -6,14 +6,19 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; @@ -25,14 +30,16 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; +import eu.dnetlib.dhp.bulktag.community.ProtoMap; import eu.dnetlib.dhp.schema.oaf.*; public class BulkTagJobTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static final String pathMap = "{\"author\":{\"path\":\"$['author'][*]['fullname']\"}," + + public static final String pathMap = "{\"protoMap\":{\"author\":{\"path\":\"$['author'][*]['fullname']\"}," + " \"title\":{\"path\":\"$['title'][*]['value']\"}, " + " \"orcid\":{\"path\":\"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']\"} , " + " \"orcid_pending\":{\"path\":\"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid_pending')]['value']\"} ," @@ -51,7 +58,7 @@ public class BulkTagJobTest { "\"method\":\"execSubstring\"," + "\"params\":[" + "{\"paramName\":\"From\", \"paramValue\":0}, " + - "{\"paramName\":\"To\",\"paramValue\":4}]}}}"; + "{\"paramName\":\"To\",\"paramValue\":4}]}}}}"; private static SparkSession spark; @@ -231,6 +238,14 @@ public class BulkTagJobTest { @Test void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { + LocalFileSystem fs = FileSystem.getLocal(new Configuration()); + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/bulktag/pathMap/") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap")); + final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/") @@ -246,7 +261,8 @@ public class BulkTagJobTest { "-outputPath", workingDir.toString() + "/", - "-pathMap", pathMap + "-pathMap", workingDir.toString() + "/data/bulktagging/protoMap", + "-nameNode", "local" }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); @@ -316,6 +332,7 @@ public class BulkTagJobTest { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/") .getPath(); + SparkBulkTagJob .main( new String[] { @@ -325,7 +342,7 @@ public class BulkTagJobTest { "-taggingConf", taggingConf, "-outputPath", workingDir.toString() + "/", - + "-baseURL", "https://services.openaire.eu/openaire/community/", "-pathMap", pathMap }); @@ -383,6 +400,71 @@ public class BulkTagJobTest { .count()); } + @Test + void datasourceTag() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/") + .getPath(); + LocalFileSystem fs = FileSystem.getLocal(new Configuration()); + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/bulktag/pathMap/") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap")); + SparkBulkTagJob + .main( + new String[] { + + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + + "-outputPath", workingDir.toString() + "/", + "-baseURL", "https://services.openaire.eu/openaire/community/", + + "-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap", + "-nameNode", "local" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/datasource") + .map(item -> OBJECT_MAPPER.readValue(item, Datasource.class)); + + Assertions.assertEquals(3, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Datasource.class)); + + verificationDataset.createOrReplaceTempView("datasource"); + + String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " + + "from datasource " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + + idExplodeCommunity.show(false); + + Assertions.assertEquals(3, idExplodeCommunity.count()); + Assertions + .assertEquals( + 3, idExplodeCommunity.filter("provenance = 'community:datasource'").count()); + Assertions + .assertEquals( + 3, + idExplodeCommunity + .filter("name = 'Bulktagging for Community - Datasource'") + .count()); + + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'dh-ch'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'clarin'").count()); + + } + @Test void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() @@ -1699,4 +1781,40 @@ public class BulkTagJobTest { } + @Test + public void prova() throws Exception { + LocalFileSystem fs = FileSystem.getLocal(new Configuration()); + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/bulktag/pathMap/") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap")); + + final String sourcePath = getClass() + .getResource( + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance/") + .getPath(); + + ProtoMap prova = new Gson() + .fromJson( + "{\"author\":{\"path\":\"$['author'][]['fullname']\"},\"title\":{\"path\":\"$['title'][]['value']\"},\"orcid\":{\"path\":\"$['author'][]['pid'][][?(@['qualifier']['classid']=='orcid')]['value']\"},\"orcid_pending\":{\"path\":\"$['author'][]['pid'][][?(@['qualifier']['classid']=='orcid_pending')]['value']\"},\"contributor\":{\"path\":\"$['contributor'][]['value']\"},\"description\":{\"path\":\"$['description'][]['value']\"},\"subject\":{\"path\":\"$['subject'][]['value']\"},\"fos\":{\"path\":\"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"},\"sdg\":{\"path\":\"$['subject'][?(@['qualifier']['classid']=='SDG')].value\"},\"journal\":{\"path\":\"$['journal'].name\"},\"hostedby\":{\"path\":\"$['instance'][]['hostedby']['key']\"},\"collectedfrom\":{\"path\":\"$['instance'][*]['collectedfrom']['key']\"},\"publisher\":{\"path\":\"$['publisher'].value\"},\"publicationyear\":{\"path\":\"$['dateofacceptance'].value\",\"action\":{\"clazz\":\"eu.dnetlib.dhp.bulktag.actions.ExecSubstringAction\",\"method\":\"execSubstring\",\"params\":[{\"paramName\":\"From\",\"paramValue\":0},{\"paramName\":\"To\",\"paramValue\":4}]}}}", + ProtoMap.class); + SparkBulkTagJob + .main( + new String[] { + + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + + "-outputPath", workingDir.toString() + "/", + + "-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap", + "-baseURL", "none", + "-nameNode", "local" + }); + + } + } diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/pathMap/pathMap b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/pathMap/pathMap new file mode 100644 index 0000000000..e7bbfe941c --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/pathMap/pathMap @@ -0,0 +1,58 @@ +{ + "author":{ + "path":"$['author'][*]['fullname']" + }, + "title":{ + "path":"$['title'][*]['value']" + }, + "orcid":{ + "path":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" + }, + "orcid_pending":{ + "path":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid_pending')]['value']" + }, + "contributor":{ + "path":"$['contributor'][*]['value']" + }, + "description":{ + "path":"$['description'][*]['value']" + }, + "subject":{ + "path":"$['subject'][*]['value']" + }, + "fos":{ + "path":"$['subject'][?(@['qualifier']['classid']=='FOS')].value" + }, + "sdg":{ + "path":"$['subject'][?(@['qualifier']['classid']=='SDG')].value" + }, + "journal":{ + "path":"$['journal'].name" + }, + "hostedby":{ + "path":"$['instance'][*]['hostedby']['key']" + }, + "collectedfrom":{ + "path":"$['instance'][*]['collectedfrom']['key']" + }, + "publisher":{ + "path":"$['publisher'].value" + }, + "publicationyear":{ + "path":"$['dateofacceptance'].value", + "action":{ + "clazz":"eu.dnetlib.dhp.bulktag.actions.ExecSubstringAction", + "method":"execSubstring", + "params":[ + { + "paramName":"From", + "paramValue":0 + }, + { + "paramName":"To", + "paramValue":4 + } + ] + } + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/datasource b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/datasource index e69de29bb2..98cd3649ad 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/datasource +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/datasource @@ -0,0 +1,3 @@ +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"consenttermsofuse":false,"contentpolicies":[{"classid":"Journal articles","classname":"Journal articles","schemeid":"eosc:contentpolicies","schemename":"eosc:contentpolicies"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"Journal archive","classname":"Journal archive","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2019-07-26","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Tobacco Use Insights"},"eoscdatasourcetype":{"classid":"Journal archive","classname":"Journal Archive","schemeid":"dnet:eosc_datasource_types","schemename":"dnet:eosc_datasource_types"},"eosctype":{"classid":"Data Source","classname":"Data Source","schemeid":"","schemename":""},"extraInfo":[],"fulltextdownload":false,"id":"10|re3data_____::a507cdacc5bbcc08761c92185dee5cab","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1179-173X","issnPrinted":"","name":"Tobacco Use Insights"},"languages":[],"lastupdatetimestamp":1680789947124,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj1179173X"},"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Tobacco Use Insights"},"openairecompatibility":{"classid":"openaire3.0","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1179-173X"],"pid":[],"policies":[],"researchentitytypes":["Literature"],"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Medicine: Public aspects of medicine"}],"thematic":false,"versioncontrol":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://journals.sagepub.com/home/tui"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::2e06c1122c7df43765fdcf91080824fa","value":"EOSC Service Catalogue"}],"consenttermsofuse":false,"contactemail":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eosc@ill.eu"},"contentpolicies":[],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"datasourcetype":{"classid":"service","classname":"service","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"service","classname":"service","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2022-07-13","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue is a database of software used mainly for data analysis of neutron and photon experiments. This database can be freely consulted. It gives an overview of software available for neutron and photon experiments and their use with respect to instruments at experimental facilities."},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue"},"eosctype":{"classid":"Service","classname":"Service","schemeid":"","schemename":""},"extraInfo":[],"fulltextdownload":false,"id":"10|doajarticles::c6cd4b532e12868c1d760a8d7cda6815","languages":["eng"],"lastupdatetimestamp":1680789947124,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://software.pan-data.eu/bundles/app/images/pandata-logo.png"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eoscdbb03112"},"odlanguages":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eng"}],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue"},"openairecompatibility":{"classid":"openaire3.0","classname":"Not yet registered","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["eosc________::ill::ill.panosc_software_catalogue"],"pid":[],"policies":[],"researchentitytypes":[],"researchproductaccesspolicies":[],"researchproductmetadataaccesspolicies":[],"subjects":[],"thematic":false,"versioncontrol":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://software.pan-data.eu/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::2e06c1122c7df43765fdcf91080824fa","value":"EOSC Service Catalogue"}],"consenttermsofuse":false,"contactemail":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eosc@ill.eu"},"contentpolicies":[],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"datasourcetype":{"classid":"service","classname":"service","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datasourcetypeui":{"classid":"service","classname":"service","schemeid":"dnet:datasource_typologies_ui","schemename":"dnet:datasource_typologies_ui"},"dateofcollection":"2022-07-13","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue is a database of software used mainly for data analysis of neutron and photon experiments. This database can be freely consulted. It gives an overview of software available for neutron and photon experiments and their use with respect to instruments at experimental facilities."},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue"},"eosctype":{"classid":"Service","classname":"Service","schemeid":"","schemename":""},"extraInfo":[],"fulltextdownload":false,"id":"10|eosc________::7ef2576047f040612b983a27347471fc","languages":["eng"],"lastupdatetimestamp":1680789947124,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"logourl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://software.pan-data.eu/bundles/app/images/pandata-logo.png"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eoscdbb03112"},"odlanguages":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"eng"}],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"PaNOSC Software Catalogue"},"openairecompatibility":{"classid":"openaire3.0","classname":"Not yet registered","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["eosc________::ill::ill.panosc_software_catalogue"],"pid":[],"policies":[],"researchentitytypes":[],"researchproductaccesspolicies":[],"researchproductmetadataaccesspolicies":[],"subjects":[],"thematic":false,"versioncontrol":false,"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://software.pan-data.eu/"}} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pom.xml b/pom.xml index 8e6f16fe54..7387f6e3bb 100644 --- a/pom.xml +++ b/pom.xml @@ -888,6 +888,7 @@ 3.3.3 3.4.2 [2.12,3.0) + [5.17.3] [6.1.0] [4.0.3] [6.0.5]