-

added decision node to upload on zenodo or not
test for the creation of relationships between context and projects when the funding contains h2020
2021-04-23 11:55:20 +02:00 · 2021-04-23 11:54:54 +02:00 · 2021-04-23 11:52:27 +02:00 · 2021-04-23 11:51:27 +02:00 · 2021-04-23 11:50:26 +02:00 · 2021-04-23 11:49:45 +02:00
47 changed files with 1054 additions and 93 deletions
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java
@ -90,6 +90,13 @@ public class MakeTarArchive implements Serializable {
 		String p_string = p.toString();
 		if (!p_string.endsWith("_SUCCESS")) {
 			String name = p_string.substring(p_string.lastIndexOf("/") + 1);
+			if (name.startsWith("part-") & name.length() > 10) {
+				String tmp = name.substring(0, 10);
+				if (name.contains(".")) {
+					tmp += name.substring(name.indexOf("."));
+				}
+				name = tmp;
+			}
 			TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
 			entry.setSize(fileStatus.getLen());
 			current_size += fileStatus.getLen();
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/APC.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/APC.java
@ -4,7 +4,7 @@ package eu.dnetlib.dhp.schema.dump.oaf;
 import java.io.Serializable;

 /**
- * Used to refer to the Article Processing Charge information. Not dumped in this release. It contains two parameters: -
+ * Used to refer to the Article Processing Charge information. It contains two parameters: -
 * currency of type String to store the currency of the APC - amount of type String to stores the charged amount
 */
 public class APC implements Serializable {
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Instance.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/Instance.java
@ -12,9 +12,11 @@ import java.util.List;
 * type of type String to store the type of the instance as defined in the corresponding dnet vocabulary
 * (dnet:pubication_resource). It corresponds to the instancetype.classname of the instance to be mapped - url of type
 * List<String> list of locations where the instance is accessible. It corresponds to url of the instance to be dumped -
- * publicationdate of type String to store the publication date of the instance ;// dateofacceptance; - refereed of type
- * String to store information abour tthe review status of the instance. Possible values are 'Unknown',
+ * publicationdate of type String to store the publication date of the instance ;// dateofacceptance;
+ * - refereed of type
+ * String to store information abour the review status of the instance. Possible values are 'Unknown',
 * 'nonPeerReviewed', 'peerReviewed'. It corresponds to refereed.classname of the instance to be dumped
+ * - articleprocessingcharge of type APC to store the article processing charges possibly associated to the instance
 */
 public class Instance implements Serializable {

@ -28,6 +30,8 @@ public class Instance implements Serializable {

 	private String publicationdate;// dateofacceptance;

+	private APC articleprocessingcharge;
+
 	private String refereed; // peer-review status

 	public String getLicense() {
@ -78,4 +82,11 @@ public class Instance implements Serializable {
 		this.refereed = refereed;
 	}

+	public APC getArticleprocessingcharge() {
+		return articleprocessingcharge;
+	}
+
+	public void setArticleprocessingcharge(APC articleprocessingcharge) {
+		this.articleprocessingcharge = articleprocessingcharge;
+	}
 }
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/ResearchInitiative.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/dump/oaf/graph/ResearchInitiative.java
@ -19,7 +19,7 @@ import java.io.Serializable;
 */
 public class ResearchInitiative implements Serializable {
 	private String id; // openaireId
-	private String originalId; // context id
+	private String acronym; // context id
 	private String name; // context name
 	private String type; // context type: research initiative or research community
 	private String description;
@ -57,12 +57,12 @@ public class ResearchInitiative implements Serializable {
 		this.name = label;
 	}

-	public String getOriginalId() {
-		return originalId;
+	public String getAcronym() {
+		return acronym;
 	}

-	public void setOriginalId(String originalId) {
-		this.originalId = originalId;
+	public void setAcronym(String acronym) {
+		this.acronym = acronym;
 	}

 	public String getDescription() {
--- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
+++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/oaf/Relation.java
@ -1,8 +1,6 @@

 package eu.dnetlib.dhp.schema.oaf;

-import eu.dnetlib.dhp.schema.common.ModelSupport;
-
 import static com.google.common.base.Preconditions.checkArgument;

 import java.text.ParseException;
@ -10,6 +8,8 @@ import java.util.*;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;

+import eu.dnetlib.dhp.schema.common.ModelSupport;
+
 /**
 * Relation models any edge between two nodes in the OpenAIRE graph. It has a source id and a target id pointing to
 * graph node identifiers and it is further characterised by the semantic of the link through the fields relType,
@ -137,7 +137,10 @@ public class Relation extends Oaf {
 		try {
 			setValidationDate(ModelSupport.oldest(getValidationDate(), r.getValidationDate()));
 		} catch (ParseException e) {
-			throw new IllegalArgumentException(String.format("invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(), getValidationDate()));
+			throw new IllegalArgumentException(String
+				.format(
+					"invalid validation date format in relation [s:%s, t:%s]: %s", getSource(), getTarget(),
+					getValidationDate()));
 		}

 		super.mergeFrom(r);
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java
@ -4,6 +4,11 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;

+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
@ -22,11 +27,6 @@ import eu.dnetlib.dhp.schema.oaf.Datasource;
 import eu.dnetlib.dhp.schema.oaf.Organization;
 import eu.dnetlib.dhp.schema.oaf.Relation;

-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Optional;
-
 public class PrepareResultInstRepoAssociation {

 	private static final Logger log = LoggerFactory.getLogger(PrepareResultInstRepoAssociation.class);
@ -56,7 +56,8 @@ public class PrepareResultInstRepoAssociation {
 		final String alreadyLinkedPath = parser.get("alreadyLinkedPath");
 		log.info("alreadyLinkedPath {}: ", alreadyLinkedPath);

-		List<String> blacklist = Optional.ofNullable(parser.get("blacklist"))
+		List<String> blacklist = Optional
+			.ofNullable(parser.get("blacklist"))
 			.map(v -> Arrays.asList(v.split(";")))
 			.orElse(new ArrayList<>());

@ -91,14 +92,13 @@ public class PrepareResultInstRepoAssociation {
 	private static void prepareDatasourceOrganization(
 		SparkSession spark, String datasourceOrganizationPath, List<String> blacklist) {
 		String blacklisted = "";
-		if(blacklist.size() > 0 ){
+		if (blacklist.size() > 0) {
 			blacklisted = " AND  d.id != '" + blacklist.get(0) + "'";
 			for (int i = 1; i < blacklist.size(); i++) {
 				blacklisted += " AND d.id != '" + blacklist.get(i) + "'";
 			}
 		}

-
 		String query = "SELECT source datasourceId, target organizationId "
 			+ "FROM ( SELECT id "
 			+ "FROM datasource "
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java
@ -24,8 +24,6 @@ public class Constants {

 	public static String RESEARCH_INFRASTRUCTURE = "Research Infrastructure/Initiative";

-	public static String ORCID = "orcid";
-
 	static {
 		accessRightsCoarMap.put("OPEN", "c_abf2");
 		accessRightsCoarMap.put("RESTRICTED", "c_16ec");
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/DumpProducts.java
@ -37,7 +37,8 @@ public class DumpProducts implements Serializable {
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
-				execDump(spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType);
+				execDump(
+					spark, inputPath, outputPath, communityMapPath, inputClazz, outputClazz, dumpType);
 			});
 	}

@ -89,7 +90,7 @@ public class DumpProducts implements Serializable {
 					return c.getId();
 				}
 				if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
-					return c.getId().substring(0, 3);
+					return c.getId().substring(0, c.getId().indexOf("::"));
 				}
 				return null;
 			}).filter(Objects::nonNull).collect(Collectors.toList());
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java
@ -20,7 +20,7 @@ public class QueryInformationSystem {
 	private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
 		+
 		"  where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
-		" and ($x//context/param[./@name = 'status']/text() = 'manager'  or $x//context/param[./@name = 'status']/text() = 'all') "
+		" and ($x//context/param[./@name = 'status']/text() = 'all') "
 		+
 		"  return " +
 		"<community> " +
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java
@ -424,6 +424,19 @@ public class ResultMapper implements Serializable {
 			.ifPresent(value -> instance.setType(value.getClassname()));
 		Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));

+		Optional<Field<String>> oPca = Optional.ofNullable(i.getProcessingchargeamount());
+		Optional<Field<String>> oPcc = Optional.ofNullable(i.getProcessingchargecurrency());
+		if (oPca.isPresent() && oPcc.isPresent()) {
+			Field<String> pca = oPca.get();
+			Field<String> pcc = oPcc.get();
+			if (!pca.getValue().trim().equals("") && !pcc.getValue().trim().equals("")) {
+				APC apc = new APC();
+				apc.setCurrency(oPcc.get().getValue());
+				apc.setAmount(oPca.get().getValue());
+				instance.setArticleprocessingcharge(apc);
+			}
+
+		}
 	}

 	private static List<Provenance> getUniqueProvenance(List<Provenance> provenance) {
@ -503,7 +516,7 @@ public class ResultMapper implements Serializable {

 	private static Pid getOrcid(List<StructuredProperty> p) {
 		for (StructuredProperty pid : p) {
-			if (pid.getQualifier().getClassid().equals(Constants.ORCID)) {
+			if (pid.getQualifier().getClassid().equals(ModelConstants.ORCID)) {
 				Optional<DataInfo> di = Optional.ofNullable(pid.getDataInfo());
 				if (di.isPresent()) {
 					return Pid
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java
@ -1,6 +1,7 @@

 package eu.dnetlib.dhp.oa.graph.dump;

+import java.io.IOException;
 import java.io.Serializable;
 import java.util.Optional;

@ -9,6 +10,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.*;
+import org.jetbrains.annotations.NotNull;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
@ -48,15 +50,12 @@ public class SendToZenodoHDFS implements Serializable {
 			.orElse(false);

 		final String depositionId = Optional.ofNullable(parser.get("depositionId")).orElse(null);
-		final String communityMapPath = parser.get("communityMapPath");

 		Configuration conf = new Configuration();
 		conf.set("fs.defaultFS", hdfsNameNode);

 		FileSystem fileSystem = FileSystem.get(conf);

-		CommunityMap communityMap = Utils.readCommunityMap(fileSystem, communityMapPath);
-
 		RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
 			.listFiles(
 				new Path(hdfsPath), true);
@ -87,11 +86,6 @@ public class SendToZenodoHDFS implements Serializable {
 			if (!p_string.endsWith("_SUCCESS")) {
 				// String tmp = p_string.substring(0, p_string.lastIndexOf("/"));
 				String name = p_string.substring(p_string.lastIndexOf("/") + 1);
-				log.info("Sending information for community: " + name);
-				if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) {
-					name = communityMap.get(name.substring(0, name.lastIndexOf("."))).replace(" ", "_") + ".tar";
-				}
-
 				FSDataInputStream inputStream = fileSystem.open(p);
 				zenodoApiClient.uploadIS(inputStream, name, fileStatus.getLen());

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java
@ -34,12 +34,14 @@ public class CommunitySplit implements Serializable {
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
-				execSplit(spark, inputPath, outputPath, Utils.getCommunityMap(spark, communityMapPath).keySet());
+				CommunityMap communityMap = Utils.getCommunityMap(spark, communityMapPath);
+
+				execSplit(spark, inputPath, outputPath, communityMap);
 			});
 	}

 	private static void execSplit(SparkSession spark, String inputPath, String outputPath,
-		Set<String> communities) {
+		CommunityMap communities) {

 		Dataset<CommunityResult> result = Utils
 			.readPath(spark, inputPath + "/publication", CommunityResult.class)
@ -48,8 +50,9 @@ public class CommunitySplit implements Serializable {
 			.union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class));

 		communities
+			.keySet()
 			.stream()
-			.forEach(c -> printResult(c, result, outputPath));
+			.forEach(c -> printResult(c, result, outputPath + "/" + communities.get(c).replace(" ", "_")));

 	}

@ -57,16 +60,11 @@ public class CommunitySplit implements Serializable {
 		Dataset<CommunityResult> community_products = result
 			.filter((FilterFunction<CommunityResult>) r -> containsCommunity(r, c));

-		try {
-			community_products.first();
 		community_products
 			.write()
 			.option("compression", "gzip")
 			.mode(SaveMode.Overwrite)
-				.json(outputPath + "/" + c);
-		} catch (Exception e) {
-
-		}
+			.json(outputPath);

 	}

@ -75,9 +73,9 @@ public class CommunitySplit implements Serializable {
 			return r
 				.getContext()
 				.stream()
-				.filter(con -> con.getCode().equals(c))
+				.map(con -> con.getCode())
 				.collect(Collectors.toList())
-				.size() > 0;
+				.contains(c);
 		}
 		return false;
 	}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/RemoveCommunities.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/RemoveCommunities.java
@ -0,0 +1,85 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.community;
+
+import java.io.*;
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class RemoveCommunities implements Serializable {
+
+	private static final Logger log = LoggerFactory.getLogger(RemoveCommunities.class);
+	private final static ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+	private final Configuration conf;
+	private final BufferedWriter writer;
+	private final CommunityMap communityMap;
+
+	public RemoveCommunities(String path, String hdfsNameNode) throws IOException {
+		conf = new Configuration();
+		conf.set("fs.defaultFS", hdfsNameNode);
+		FileSystem fileSystem = FileSystem.get(conf);
+		Path hdfsPath = new Path(path);
+		// FSDataInputStream p = fileSystem.open(hdfsPath);
+		// ObjectMapper mapper = new ObjectMapper();
+		communityMap = OBJECT_MAPPER.readValue((InputStream) fileSystem.open(hdfsPath), CommunityMap.class);
+		FSDataOutputStream fsDataOutputStream = null;
+		if (fileSystem.exists(hdfsPath)) {
+			fileSystem.delete(hdfsPath);
+		}
+		fsDataOutputStream = fileSystem.create(hdfsPath);
+
+		writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
+
+	}
+
+	public static void main(String[] args) throws Exception {
+		String jsonConfiguration = IOUtils
+			.toString(
+				RemoveCommunities.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/oa/graph/dump/input_rc_parameters.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		parser.parseArgument(args);
+
+		final String nameNode = parser.get("nameNode");
+		log.info("nameNode: {}", nameNode);
+
+		final String outputPath = parser.get("path");
+		log.info("outputPath: {}", outputPath);
+
+		final String communityId = parser.get("communityId");
+
+		final RemoveCommunities scm = new RemoveCommunities(outputPath, nameNode);
+
+		scm.removeCommunities(communityId);
+
+	}
+
+	private void removeCommunities(String communityId) throws IOException {
+		Set<String> toRemove = communityMap.keySet().stream().map(key -> {
+			if (key.equals(communityId))
+				return null;
+			return key;
+		}).filter(Objects::nonNull).collect(Collectors.toSet());
+
+		toRemove.forEach(key -> communityMap.remove(key));
+		writer.write(OBJECT_MAPPER.writeValueAsString(communityMap));
+		writer.close();
+	}
+
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java
@ -70,10 +70,10 @@ public class CreateContextRelation implements Serializable {
 		cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class));

 		log.info("Creating relations for projects... ");
-//		cce
-//			.execute(
-//				Process::getRelation, CONTEX_RELATION_PROJECT,
-//				ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));
+		cce
+			.execute(
+				Process::getRelation, CONTEX_RELATION_PROJECT,
+				ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class));

 		cce.close();

--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpGraphEntities.java
@ -9,6 +9,7 @@ import java.util.*;
 import java.util.stream.Collectors;

 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.FilterFunction;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
@ -453,6 +454,7 @@ public class DumpGraphEntities implements Serializable {
 			.map(
 				(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
 				Encoders.bean(Organization.class))
+			.filter((FilterFunction<Organization>) o -> o != null)
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
@ -461,6 +463,8 @@ public class DumpGraphEntities implements Serializable {

 	private static Organization mapOrganization(eu.dnetlib.dhp.schema.oaf.Organization org) {
 		Organization organization = new Organization();
+		if (org.getDataInfo().getDeletedbyinference())
+			return null;

 		Optional
 			.ofNullable(org.getLegalshortname())
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Extractor.java
@ -147,7 +147,7 @@ public class Extractor implements Serializable {
 					.map(
 						paction -> Provenance
 							.newInstance(
-								paction.getClassid(),
+								paction.getClassname(),
 								dinfo.getTrust()))
 					.orElse(
 						Provenance
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java
@ -35,7 +35,7 @@ public class Process implements Serializable {
 				ri.setType(Constants.RESEARCH_INFRASTRUCTURE);
 			}
 			ri.setId(Utils.getContextId(ci.getId()));
-			ri.setOriginalId(ci.getId());
+			ri.setAcronym(ci.getId());

 			ri.setDescription(ci.getDescription());
 			ri.setName(ci.getName());
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java
@ -12,6 +12,7 @@ import org.dom4j.Node;
 import org.dom4j.io.SAXReader;
 import org.jetbrains.annotations.NotNull;

+import eu.dnetlib.dhp.utils.DHPUtils;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;

@ -113,14 +114,72 @@ public class QueryInformationSystem {
 	@NotNull
 	private List<String> getCategoryList(Element el, String prefix) {
 		List<String> datasourceList = new ArrayList<>();
-		for (Object node : el.selectNodes(".//param")) {
-			Node n = (Node) node;
-			if (n.valueOf("./@name").equals("openaireId")) {
-				datasourceList.add(prefix + "|" + n.getText());
-			}
+		for (Object node : el.selectNodes(".//concept")) {
+			String oid = getOpenaireId((Node) node, prefix);
+			if (oid != null)
+				datasourceList.add(oid);
 		}

 		return datasourceList;
 	}

+	private String getOpenaireId(Node el, String prefix) {
+
+		for (Object node : el.selectNodes(".//param")) {
+			Node n = (Node) node;
+			if (n.valueOf("./@name").equals("openaireId")) {
+				return prefix + "|" + n.getText();
+			}
+		}
+		return makeOpenaireId(el, prefix);
+
+	}
+
+	private String makeOpenaireId(Node el, String prefix) {
+		String funder = null;
+		String grantId = null;
+		String funding = null;
+		for (Object node : el.selectNodes(".//param")) {
+			Node n = (Node) node;
+			switch (n.valueOf("./@name")) {
+				case "funding":
+					funding = n.getText();
+					break;
+				case "funder":
+					funder = n.getText();
+					break;
+				case "CD_PROJECT_NUMBER":
+					grantId = n.getText();
+					break;
+			}
+		}
+		String nsp = null;
+		switch (funder.toLowerCase()) {
+			case "ec":
+				if (funding == null) {
+					return null;
+				}
+				if (funding.toLowerCase().contains("h2020")) {
+					nsp = "corda__h2020::";
+				} else {
+					nsp = "corda_______::";
+				}
+				break;
+			case "tubitak":
+				nsp = "tubitakf____::";
+				break;
+			case "dfg":
+				nsp = "dfgf________::";
+				break;
+			default:
+				nsp = funder.toLowerCase();
+				for (int i = funder.length(); i < 12; i++)
+					nsp += "_";
+				nsp += "::";
+		}
+
+		return prefix + "|" + nsp + DHPUtils.md5(grantId);
+
+	}
+
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkSelectValidRelationsJob.java
@ -0,0 +1,137 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.complete;
+
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+
+import java.io.Serializable;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.oa.graph.dump.Utils;
+import eu.dnetlib.dhp.schema.oaf.*;
+
+/**
+ * It selects the valid relations among those present in the graph. One relation is valid if it is not deletedbyinference
+ * and if both the source and the target node are present in the graph and are not deleted by inference nor invisible.
+ * To check this I made a view of the ids of all the entities in the graph, and select the relations for which a join exists
+ * with this view for both the source and the target
+ */
+
+public class SparkSelectValidRelationsJob implements Serializable {
+
+	private static final Logger log = LoggerFactory.getLogger(SparkSelectValidRelationsJob.class);
+
+	public static void main(String[] args) throws Exception {
+		String jsonConfiguration = IOUtils
+			.toString(
+				SparkSelectValidRelationsJob.class
+					.getResourceAsStream(
+						"/eu/dnetlib/dhp/oa/graph/dump/complete/input_relationdump_parameters.json"));
+
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		parser.parseArgument(args);
+
+		Boolean isSparkSessionManaged = Optional
+			.ofNullable(parser.get("isSparkSessionManaged"))
+			.map(Boolean::valueOf)
+			.orElse(Boolean.TRUE);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+
+		final String inputPath = parser.get("sourcePath");
+		log.info("inputPath: {}", inputPath);
+
+		final String outputPath = parser.get("outputPath");
+		log.info("outputPath: {}", outputPath);
+
+		SparkConf conf = new SparkConf();
+
+		runWithSparkSession(
+			conf,
+			isSparkSessionManaged,
+			spark -> {
+				Utils.removeOutputDir(spark, outputPath);
+				selectValidRelation(spark, inputPath, outputPath);
+
+			});
+
+	}
+
+	private static void selectValidRelation(SparkSession spark, String inputPath, String outputPath) {
+		Dataset<Relation> relation = Utils.readPath(spark, inputPath + "/relation", Relation.class);
+		Dataset<Publication> publication = Utils.readPath(spark, inputPath + "/publication", Publication.class);
+		Dataset<eu.dnetlib.dhp.schema.oaf.Dataset> dataset = Utils
+			.readPath(spark, inputPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class);
+		Dataset<Software> software = Utils.readPath(spark, inputPath + "/software", Software.class);
+		Dataset<OtherResearchProduct> other = Utils
+			.readPath(spark, inputPath + "/otherresearchproduct", OtherResearchProduct.class);
+		Dataset<Organization> organization = Utils.readPath(spark, inputPath + "/organization", Organization.class);
+		Dataset<Project> project = Utils.readPath(spark, inputPath + "/project", Project.class);
+		Dataset<Datasource> datasource = Utils.readPath(spark, inputPath + "/datasource", Datasource.class);
+
+		relation.createOrReplaceTempView("relation");
+		publication.createOrReplaceTempView("publication");
+		dataset.createOrReplaceTempView("dataset");
+		other.createOrReplaceTempView("other");
+		software.createOrReplaceTempView("software");
+		organization.createOrReplaceTempView("organization");
+		project.createOrReplaceTempView("project");
+		datasource.createOrReplaceTempView("datasource");
+
+		spark
+			.sql(
+				"SELECT id " +
+					"FROM publication " +
+					"WHERE datainfo.deletedbyinference = false AND  datainfo.invisible = false " +
+					"UNION ALL " +
+					"SELECT id " +
+					"FROM dataset " +
+					"WHERE datainfo.deletedbyinference = false AND  datainfo.invisible = false " +
+					"UNION ALL " +
+					"SELECT id " +
+					"FROM other " +
+					"WHERE datainfo.deletedbyinference = false AND  datainfo.invisible = false " +
+					"UNION ALL " +
+					"SELECT id " +
+					"FROM software " +
+					"WHERE datainfo.deletedbyinference = false AND  datainfo.invisible = false " +
+					"UNION ALL " +
+					"SELECT id " +
+					"FROM organization " +
+					"WHERE datainfo.deletedbyinference = false AND  datainfo.invisible = false " +
+					"UNION ALL " +
+					"SELECT id " +
+					"FROM project " +
+					"WHERE datainfo.deletedbyinference = false AND  datainfo.invisible = false " +
+					"UNION ALL " +
+					"SELECT id " +
+					"FROM datasource " +
+					"WHERE datainfo.deletedbyinference = false AND  datainfo.invisible = false ")
+			.createOrReplaceTempView("identifiers");
+
+		spark
+			.sql(
+				"SELECT relation.* " +
+					"FROM relation " +
+					"JOIN identifiers i1 " +
+					"ON source = i1.id " +
+					"JOIN identifiers i2 " +
+					"ON target = i2.id " +
+					"WHERE datainfo.deletedbyinference = false")
+			.as(Encoders.bean(Relation.class))
+			.write()
+			.option("compression", "gzip")
+			.mode(SaveMode.Overwrite)
+			.json(outputPath);
+		;
+
+	}
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java
@ -102,19 +102,26 @@ public class SparkDumpFunderResults implements Serializable {
 			} else {
 				funderdump = fundernsp.substring(0, fundernsp.indexOf("_")).toUpperCase();
 			}
-			writeFunderResult(funder, result, outputPath + "/" + funderdump);
+			writeFunderResult(funder, result, outputPath, funderdump);
 		});

 	}

-	private static void writeFunderResult(String funder, Dataset<CommunityResult> results, String outputPath) {
+	private static void dumpResults(String nsp, Dataset<CommunityResult> results, String outputPath,
+		String funderName) {

 		results.map((MapFunction<CommunityResult, CommunityResult>) r -> {
 			if (!Optional.ofNullable(r.getProjects()).isPresent()) {
 				return null;
 			}
 			for (Project p : r.getProjects()) {
-				if (p.getId().startsWith(funder)) {
+				if (p.getId().startsWith(nsp)) {
+					if (nsp.startsWith("40|irb")) {
+						if (p.getFunder().getShortName().equals(funderName))
+							return r;
+						else
+							return null;
+					}
 					return r;
 				}
 			}
@ -124,7 +131,18 @@ public class SparkDumpFunderResults implements Serializable {
 			.write()
 			.mode(SaveMode.Overwrite)
 			.option("compression", "gzip")
-			.json(outputPath);
+			.json(outputPath + "/" + funderName);
+	}
+
+	private static void writeFunderResult(String funder, Dataset<CommunityResult> results, String outputPath,
+		String funderDump) {
+
+		if (funder.startsWith("40|irb")) {
+			dumpResults(funder, results, outputPath, "HRZZ");
+			dumpResults(funder, results, outputPath, "MZOS");
+		} else
+			dumpResults(funder, results, outputPath, funderDump);
+
 	}

 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/community/oozie_app/workflow.xml
@ -1,6 +1,18 @@
 <workflow-app name="dump_community_products" xmlns="uri:oozie:workflow:0.5">

    <parameters>
+        <property>
+            <name>singleDeposition</name>
+            <description>Indicates if each file in the directory should be uploaded in a own deposition</description>
+        </property>
+        <property>
+            <name>upload</name>
+            <description>true if the dump should be upload in zenodo</description>
+        </property>
+        <property>
+            <name>communityId</name>
+            <description>the id of the community to be dumped if a dump for a single community should be done</description>
+        </property>
        <property>
            <name>sourcePath</name>
            <description>the source path</description>
@ -123,6 +135,24 @@
            <arg>--nameNode</arg><arg>${nameNode}</arg>
            <arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
        </java>
+        <ok to="single_deposition"/>
+        <error to="Kill"/>
+    </action>
+
+    <decision name="single_deposition">
+        <switch>
+            <case to="remove_communities">${wf:conf('singleDeposition') eq true}</case>
+            <default to="fork_dump"/>
+        </switch>
+    </decision>
+
+    <action name="remove_communities">
+        <java>
+            <main-class>eu.dnetlib.dhp.oa.graph.dump.community.RemoveCommunities</main-class>
+            <arg>--path</arg><arg>${workingDir}/communityMap</arg>
+            <arg>--nameNode</arg><arg>${nameNode}</arg>
+            <arg>--communityId</arg><arg>${communityId}</arg>
+        </java>
        <ok to="fork_dump"/>
        <error to="Kill"/>
    </action>
@ -405,10 +435,16 @@
            <arg>--nameNode</arg><arg>${nameNode}</arg>
            <arg>--sourcePath</arg><arg>${workingDir}/split</arg>
        </java>
-        <ok to="send_zenodo"/>
+        <ok to="should_upload"/>
        <error to="Kill"/>
    </action>

+    <decision name="should_upload">
+        <switch>
+            <case to="send_zenodo">${wf:conf('upload') eq true}</case>
+            <default to="End"/>
+        </switch>
+    </decision>
    <action name="send_zenodo">
        <java>
            <main-class>eu.dnetlib.dhp.oa.graph.dump.SendToZenodoHDFS</main-class>
@ -417,7 +453,6 @@
            <arg>--accessToken</arg><arg>${accessToken}</arg>
            <arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
            <arg>--metadata</arg><arg>${metadata}</arg>
-            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
            <arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
            <arg>--depositionId</arg><arg>${depositionId}</arg>
            <arg>--depositionType</arg><arg>${depositionType}</arg>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/oozie_app/workflow.xml
@ -148,7 +148,7 @@
        <path start="dump_organization"/>
        <path start="dump_project"/>
        <path start="dump_datasource"/>
-        <path start="dump_relation"/>
+        <path start="select_relation"/>
    </fork>

    <action name="dump_publication">
@ -333,6 +333,30 @@
        <error to="Kill"/>
    </action>

+    <action name="select_relation">
+        <spark xmlns="uri:oozie:spark-action:0.2">
+            <master>yarn</master>
+            <mode>cluster</mode>
+            <name>Select valid table relation </name>
+            <class>eu.dnetlib.dhp.oa.graph.dump.complete.SparkSelectValidRelationsJob</class>
+            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
+            <spark-opts>
+                --executor-memory=${sparkExecutorMemory}
+                --executor-cores=${sparkExecutorCores}
+                --driver-memory=${sparkDriverMemory}
+                --conf spark.extraListeners=${spark2ExtraListeners}
+                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
+                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
+                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
+                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
+            </spark-opts>
+            <arg>--sourcePath</arg><arg>${sourcePath}</arg>
+            <arg>--outputPath</arg><arg>${workingDir}/validrelation</arg>
+        </spark>
+        <ok to="dump_relation"/>
+        <error to="Kill"/>
+    </action>
+
    <action name="dump_relation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
@ -350,7 +374,7 @@
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
            </spark-opts>
-            <arg>--sourcePath</arg><arg>${sourcePath}/relation</arg>
+            <arg>--sourcePath</arg><arg>${workingDir}/validrelation</arg>
            <arg>--outputPath</arg><arg>${workingDir}/relation/relation</arg>
        </spark>
        <ok to="join_dump"/>
@ -572,7 +596,6 @@
            <arg>--accessToken</arg><arg>${accessToken}</arg>
            <arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
            <arg>--metadata</arg><arg>${metadata}</arg>
-            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
            <arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
            <arg>--depositionType</arg><arg>${depositionType}</arg>
            <arg>--depositionId</arg><arg>${depositionId}</arg>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/complete/schema/result_schema.json
@ -1,6 +1,23 @@
 {
  "$schema": "http://json-schema.org/draft-07/schema#",
  "definitions": {
+    "AccessRight":{
+      "type":"object",
+      "properties":{
+        "code": {
+          "type": "string",
+          "description": "COAR access mode code: http://vocabularies.coar-repositories.org/documentation/access_rights/"
+        },
+        "label": {
+          "type": "string",
+          "description": "Label for the access mode"
+        },
+        "scheme": {
+          "type": "string",
+          "description": "Scheme of reference for access right code. Always set to COAR access rights vocabulary: http://vocabularies.coar-repositories.org/documentation/access_rights/"
+        }
+      }
+    },
    "ControlledField": {
      "type": "object",
      "properties": {
@ -266,6 +283,57 @@
        ]
      }
    },
+    "instance":{
+      "type":"array",
+      "items":{
+        "type":"object",
+        "properties":{
+          "accessright":{
+            "allOf":[
+              {
+                "$ref":"#/definitions/AccessRight"
+              },
+              {
+                "description":"The accessright of this materialization of the result"
+              }
+            ]
+          },
+          "articleprocessingcharge":{
+            "type":"object",
+            "properties":{
+              "amount":{
+                "type":"string"
+              },
+              "currency":{
+                "type":"string"
+              }
+            }
+          },
+          "license":{
+            "type":"string"
+          },
+          "publicationdate":{
+            "type":"string"
+          },
+          "refereed":{
+            "type":"string"
+          },
+          "type":{
+            "type":"string",
+            "description":"The specific sub-type of this materialization of the result (see https://api.openaire.eu/vocabularies/dnet:result_typologies following the links)"
+          },
+          "url":{
+            "description":"Description of url",
+            "type":"array",
+            "items":{
+              "type":"string",
+              "description":"urls where it is possible to access the materialization of the result"
+            }
+          }
+        },
+        "description":"One of the materialization for this result"
+      }
+    },
    "programmingLanguage": {
      "type": "string",
      "description": "Only for results with type 'software': the programming language"
@ -302,7 +370,7 @@
          "subject": {
            "allOf": [
              {"$ref": "#/definitions/ControlledField"},
-              {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."},
+              {"description": "OpenAIRE subject classification scheme (https://api.openaire.eu/vocabularies/dnet:subject_classification_typologies) and value. When the scheme is 'keyword', it means that the subject is free-text (i.e. not a term from a controlled vocabulary)."}
            ]
          }
        }
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/funderresults/oozie_app/workflow.xml
@ -1,9 +1,8 @@
 <workflow-app name="dump_funder_results" xmlns="uri:oozie:workflow:0.5">
-
    <parameters>
        <property>
            <name>upload</name>
-            <value>false</value>
+            <value>true</value>
            <description>true to upload the dump for the funders in Zenodo</description>
        </property>
        <property>
@ -549,7 +548,6 @@
            <arg>--accessToken</arg><arg>${accessToken}</arg>
            <arg>--connectionUrl</arg><arg>${connectionUrl}</arg>
            <arg>--metadata</arg><arg>${metadata}</arg>
-            <arg>--communityMapPath</arg><arg>${workingDir}/communityMap</arg>
            <arg>--conceptRecordId</arg><arg>${conceptRecordId}</arg>
            <arg>--depositionType</arg><arg>${depositionType}</arg>
            <arg>--depositionId</arg><arg>${depositionId}</arg>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json
@ -35,7 +35,12 @@
 		"paramLongName":"dumpType",
 		"paramDescription": "the type of the dump (complete for the whole graph, community for the products related to communities, funder for the results with at least a link to project",
 		"paramRequired": false
-	}
+	},	{
+	"paramName":"cid",
+	"paramLongName":"communityId",
+	"paramDescription": "the id of the community to be dumped",
+	"paramRequired": false
+}
 ]


--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_rc_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/input_rc_parameters.json
@ -0,0 +1,25 @@
+
+[
+
+	{
+		"paramName":"ci",
+		"paramLongName":"communityId",
+		"paramDescription": "URL of the isLookUp Service",
+		"paramRequired": true
+	},
+	{
+		"paramName":"nn",
+		"paramLongName":"nameNode",
+		"paramDescription": "the name node",
+		"paramRequired": true
+	},
+	{
+		"paramName": "p",
+		"paramLongName": "path",
+		"paramDescription": "the path used to store temporary output files",
+		"paramRequired": true
+	}
+]
+
+
+
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json
@ -25,6 +25,12 @@
 		"paramLongName": "isSparkSessionManaged",
 		"paramDescription": "true if the spark session is managed, false otherwise",
 		"paramRequired": false
+	},
+	{
+		"paramName": "cid",
+		"paramLongName": "communityId",
+		"paramDescription": "true if the spark session is managed, false otherwise",
+		"paramRequired": false
 	}
 ]

--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/dump/upload_zenodo.json
@ -12,12 +12,6 @@
    "paramDescription": "The id of the concept record for a new version",
    "paramRequired": false
  },
-  {
-    "paramName":"cmp",
-    "paramLongName":"communityMapPath",
-    "paramDescription": "the path to the serialization of the community map",
-    "paramRequired": false
-  },
  {
    "paramName":"di",
    "paramLongName":"depositionId",
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/DumpJobTest.java
@ -13,6 +13,7 @@ import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.ForeachFunction;
 import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.*;
 import org.slf4j.Logger;
@ -408,4 +409,53 @@ public class DumpJobTest {

 	}

+	@Test
+	public void testArticlePCA() {
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca")
+			.getPath();
+
+		final String communityMapPath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+			.getPath();
+
+		DumpProducts dump = new DumpProducts();
+		dump
+			.run(
+				// false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
+				false, sourcePath, workingDir.toString() + "/result", communityMapPath, Publication.class,
+				GraphResult.class, Constants.DUMPTYPE.COMPLETE.getType());
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<GraphResult> tmp = sc
+			.textFile(workingDir.toString() + "/result")
+			.map(item -> OBJECT_MAPPER.readValue(item, GraphResult.class));
+
+		org.apache.spark.sql.Dataset<GraphResult> verificationDataset = spark
+			.createDataset(tmp.rdd(), Encoders.bean(GraphResult.class));
+
+		Assertions.assertEquals(23, verificationDataset.count());
+		// verificationDataset.show(false);
+
+		Assertions.assertEquals(23, verificationDataset.filter("type = 'publication'").count());
+
+		verificationDataset.createOrReplaceTempView("check");
+
+		org.apache.spark.sql.Dataset<Row> temp = spark
+			.sql(
+				"select id " +
+					"from check " +
+					"lateral view explode (instance) i as inst " +
+					"where inst.articleprocessingcharge is not null");
+
+		Assertions.assertTrue(temp.count() == 2);
+
+		Assertions.assertTrue(temp.filter("id = '50|datacite____::05c611fdfc93d7a2a703d1324e28104a'").count() == 1);
+
+		Assertions.assertTrue(temp.filter("id = '50|dedup_wf_001::01e6a28565ca01376b7548e530c6f6e8'").count() == 1);
+
+//TODO verify value and name of the fields for vocab related value (i.e. accessright, bestaccessright)
+	}
+
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java
@ -9,7 +9,7 @@ import com.github.victools.jsonschema.generator.*;

 import eu.dnetlib.dhp.schema.dump.oaf.graph.*;

-@Disabled
+//@Disabled
 public class GenerateJsonSchema {

 	@Test
@ -21,7 +21,7 @@ public class GenerateJsonSchema {
 		configBuilder.forFields().withDescriptionResolver(field -> "Description of " + field.getDeclaredName());
 		SchemaGeneratorConfig config = configBuilder.build();
 		SchemaGenerator generator = new SchemaGenerator(config);
-		JsonNode jsonSchema = generator.generateSchema(Relation.class);
+		JsonNode jsonSchema = generator.generateSchema(GraphResult.class);

 		System.out.println(jsonSchema.toString());
 	}
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/community/RemoveCommunityTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/community/RemoveCommunityTest.java
@ -0,0 +1,60 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.community;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.oa.graph.dump.MakeTar;
+import eu.dnetlib.dhp.oa.graph.dump.MakeTarTest;
+
+public class RemoveCommunityTest {
+
+	private static String workingDir;
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(MakeTarTest.class.getSimpleName())
+			.toString();
+	}
+
+	@Test
+	public void testRemove() throws Exception {
+		LocalFileSystem fs = FileSystem.getLocal(new Configuration());
+
+		fs
+			.copyFromLocalFile(
+				false, new Path(getClass()
+					.getResource("/eu/dnetlib/dhp/oa/graph/dump/communityMapPath/communitymap.json")
+					.getPath()),
+				new Path(workingDir + "/communityMap"));
+
+		String path = workingDir + "/communityMap";
+
+		RemoveCommunities.main(new String[] {
+			"-nameNode", "local",
+			"-path", path,
+			"-communityId", "beopen"
+		}
+
+		);
+
+		CommunityMap cm = new ObjectMapper()
+			.readValue(new FileInputStream(workingDir + "/communityMap"), CommunityMap.class);
+
+		Assertions.assertEquals(1, cm.size());
+		Assertions.assertTrue(cm.containsKey("beopen"));
+
+	}
+}
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java
@ -97,7 +97,7 @@ public class CreateEntityTest {
 		Assertions.assertEquals(12, riList.size());

 		riList.stream().forEach(c -> {
-			switch (c.getOriginalId()) {
+			switch (c.getAcronym()) {
 				case "mes":
 					Assertions
 						.assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY));
@ -115,9 +115,9 @@ public class CreateEntityTest {
 									String
 										.format(
 											"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
-											DHPUtils.md5(c.getOriginalId()))));
+											DHPUtils.md5(c.getAcronym()))));
 					Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes"));
-					Assertions.assertTrue("mes".equals(c.getOriginalId()));
+					Assertions.assertTrue("mes".equals(c.getAcronym()));
 					break;
 				case "clarin":
 					Assertions
@ -130,9 +130,9 @@ public class CreateEntityTest {
 									String
 										.format(
 											"%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX,
-											DHPUtils.md5(c.getOriginalId()))));
+											DHPUtils.md5(c.getAcronym()))));
 					Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin"));
-					Assertions.assertTrue("clarin".equals(c.getOriginalId()));
+					Assertions.assertTrue("clarin".equals(c.getAcronym()));
 					break;
 			}
 			// TODO add check for all the others Entities
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java
@ -9,11 +9,14 @@ import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;

+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.gson.Gson;

 import eu.dnetlib.dhp.schema.common.ModelSupport;
 import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation;
 import eu.dnetlib.dhp.schema.oaf.Datasource;
+import eu.dnetlib.dhp.schema.oaf.Project;
 import eu.dnetlib.dhp.utils.DHPUtils;

 public class CreateRelationTest {
@ -203,6 +206,7 @@ public class CreateRelationTest {
 				"    <param name=\"suggestedAcknowledgement\"/>\n" +
 				"    <param name=\"zenodoCommunity\">oac_ni</param>\n" +
 				"    <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
+				"    <category claim=\"false\" id=\"ni::projects\" label=\"NI Content providers\"/>\n" +
 				"    <category claim=\"false\" id=\"ni::contentproviders\" label=\"NI Content providers\">\n" +
 				"        <concept claim=\"false\" id=\"ni::contentproviders::1\" label=\"OpenNeuro\">\n" +
 				"            <param name=\"openaireId\">re3data_____::5b9bf9171d92df854cf3c520692e9122</param>\n" +
@ -376,7 +380,7 @@ public class CreateRelationTest {
 				"            <param name=\"rule\"/>\n" +
 				"            <param name=\"CD_PROJECT_NUMBER\">675858</param>\n" +
 				"            <param name=\"url\"/>\n" +
-				"            <param name=\"funding\">H2020-EINFRA-2015-1</param>\n" +
+				"            <param name=\"funding\">EC | H2020 | RIA</param>\n" +
 				"            <param name=\"funder\">EC</param>\n" +
 				"            <param name=\"acronym\">West-Life</param>\n" +
 				"        </concept>\n" +
@ -437,7 +441,65 @@ public class CreateRelationTest {
 				"    <param name=\"suggestedAcknowledgement\"/>\n" +
 				"    <param name=\"zenodoCommunity\">oaa_elixir-gr</param>\n" +
 				"    <param name=\"creationdate\">2018-03-01T12:00:00</param>\n" +
-				"    <category claim=\"false\" id=\"elixir-gr::projects\" label=\"ELIXIR GR Projects\"/>\n" +
+				"    <category claim=\"false\" id=\"elixir-gr::projects\" label=\"ELIXIR GR Projects\">\n" +
+				"                    <concept claim=\"false\" id=\"ni::projects::12\" label=\"\">\n" +
+				"                        <param name=\"projectfullname\">BIO-INFORMATICS RESEARCH NETWORK COORDINATING CENTER (BIRN-CC)</param>\n"
+				+
+				"                        <param name=\"acronym\"/>\n" +
+				"                        <param name=\"CD_PROJECT_NUMBER\">1U24RR025736-01</param>\n" +
+				"                        <param name=\"funder\">NIH</param>\n" +
+				"                    </concept>\n" +
+				"                    <concept claim=\"false\" id=\"ni::projects::13\" label=\"\">\n" +
+				"                        <param name=\"projectfullname\">COLLABORATIVE RESEARCH: The Cognitive Neuroscience of Category Learning</param>\n"
+				+
+				"                        <param name=\"acronym\"/>\n" +
+				"                        <param name=\"CD_PROJECT_NUMBER\">0223843</param>\n" +
+				"                        <param name=\"funder\">NSF</param>\n" +
+				"                    </concept>\n" +
+				"                    <concept claim=\"false\" id=\"ni::projects::14\" label=\"\">\n" +
+				"                        <param name=\"projectfullname\">The Cognitive Atlas: Developing an Interdisciplinary Knowledge Base Through Socia</param>\n"
+				+
+				"                        <param name=\"acronym\"/>\n" +
+				"                        <param name=\"CD_PROJECT_NUMBER\">5R01MH082795-05</param>\n" +
+				"                        <param name=\"funder\">NIH</param>\n" +
+				"                    </concept>\n" +
+				"                    <concept claim=\"false\" id=\"ni::projects::15\" label=\"\">\n" +
+				"                        <param name=\"projectfullname\">Fragmented early life environmental and emotional / cognitive vulnerabilities</param>\n"
+				+
+				"                        <param name=\"acronym\"/>\n" +
+				"                        <param name=\"CD_PROJECT_NUMBER\">1P50MH096889-01A1</param>\n" +
+				"                        <param name=\"funder\">NIH</param>\n" +
+				"                    </concept>\n" +
+				"                    <concept claim=\"false\" id=\"ni::projects::16\" label=\"\">\n" +
+				"                        <param name=\"projectfullname\">Enhancement of the 1000 Functional Connectome Project</param>\n"
+				+
+				"                        <param name=\"acronym\"/>\n" +
+				"                        <param name=\"CD_PROJECT_NUMBER\">1R03MH096321-01A1</param>\n" +
+				"                        <param name=\"funder\">TUBITAK</param>\n" +
+				"                    </concept>\n" +
+				"                    <concept claim=\"false\" id=\"ni::projects::17\" label=\"\">\n" +
+				"                        <param name=\"projectfullname\">CRCNS Data Sharing: An open data repository for cognitive neuroscience: The OpenfMRI Project</param>\n"
+				+
+				"                        <param name=\"acronym\"/>\n" +
+				"                        <param name=\"CD_PROJECT_NUMBER\">1131441</param>\n" +
+				"                        <param name=\"funder\">NSF</param>\n" +
+				"                    </concept>\n" +
+				"                    <concept claim=\"false\" id=\"ni::projects::18\" label=\"\">\n" +
+				"                        <param name=\"projectfullname\">Enhancing Human Cortical Plasticity: Visual Psychophysics and fMRI</param>\n"
+				+
+				"                        <param name=\"acronym\"/>\n" +
+				"                        <param name=\"CD_PROJECT_NUMBER\">0121950</param>\n" +
+				"                        <param name=\"funder\">NSF</param>\n" +
+				"                    </concept>\n" +
+				"                    <concept claim=\"false\" id=\"ni::projects::18\" label=\"\">\n" +
+				"                        <param name=\"projectfullname\">Transforming statistical methodology for neuroimaging meta-analysis.</param>\n"
+				+
+				"                        <param name=\"acronym\"/>\n" +
+				"                        <param name=\"CD_PROJECT_NUMBER\">100309</param>\n" +
+				"                        <param name=\"funder\">WT</param>\n" +
+				"                    </concept>\n" +
+				"                </category>" +
+
 				"    <category claim=\"false\" id=\"elixir-gr::contentproviders\" label=\"Elixir-GR Content providers\">\n"
 				+
 				"        <concept claim=\"false\" id=\"elixir-gr::contentproviders::1\" label=\"bio.tools\">\n" +
@ -566,4 +628,98 @@ public class CreateRelationTest {
 					tmp.contains("10|doajarticles::2899208a99aa7d142646e0a80bfeef05"));

 	}
+
+	@Test
+	public void test2() {
+		List<ContextInfo> cInfoList = new ArrayList<>();
+		final Consumer<ContextInfo> consumer = ci -> cInfoList.add(ci);
+
+		queryInformationSystem
+			.getContextRelation(consumer, "projects", ModelSupport.getIdPrefix(Project.class));
+
+		cInfoList.forEach(c -> System.out.println(new Gson().toJson(c)));
+
+		List<Relation> rList = new ArrayList<>();
+
+		cInfoList.forEach(cInfo -> Process.getRelation(cInfo).forEach(rList::add));
+
+		Assertions.assertEquals(44, rList.size());
+
+		Assertions
+			.assertFalse(
+				rList
+					.stream()
+					.map(r -> r.getSource().getId())
+					.collect(Collectors.toSet())
+					.contains(
+						String
+							.format(
+								"%s|%s::%s", Constants.CONTEXT_ID,
+								Constants.CONTEXT_NS_PREFIX,
+								DHPUtils.md5("dh-ch"))));
+
+		Assertions
+			.assertEquals(
+				2,
+				rList
+					.stream()
+					.filter(
+						r -> r
+							.getSource()
+							.getId()
+							.equals(
+								String
+									.format(
+										"%s|%s::%s", Constants.CONTEXT_ID,
+										Constants.CONTEXT_NS_PREFIX,
+										DHPUtils.md5("clarin"))))
+					.collect(Collectors.toList())
+					.size());
+
+		Assertions
+			.assertEquals(
+				2,
+				rList
+					.stream()
+					.filter(
+						r -> r
+							.getTarget()
+							.getId()
+							.equals(
+								String
+									.format(
+										"%s|%s::%s", Constants.CONTEXT_ID,
+										Constants.CONTEXT_NS_PREFIX,
+										DHPUtils.md5("clarin"))))
+					.collect(Collectors.toList())
+					.size());
+
+		Set<String> tmp = rList
+			.stream()
+			.filter(
+				r -> r
+					.getSource()
+					.getId()
+					.equals(
+						String
+							.format(
+								"%s|%s::%s", Constants.CONTEXT_ID,
+								Constants.CONTEXT_NS_PREFIX,
+								DHPUtils.md5("clarin"))))
+			.map(r -> r.getTarget().getId())
+			.collect(Collectors.toSet());
+
+		Assertions
+			.assertTrue(
+				tmp.contains("40|corda__h2020::b5a4eb56bf84bef2ebc193306b4d423f") &&
+					tmp.contains("40|corda_______::ef782b2d85676aa3e5a907427feb18c4"));
+
+		rList.forEach(rel -> {
+			if (rel.getSource().getId().startsWith("40|")) {
+				String proj = rel.getSource().getId().substring(3);
+				Assertions.assertTrue(proj.substring(0, proj.indexOf("::")).length() == 12);
+			}
+		});
+
+	}
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/DumpOrganizationProjectDatasourceTest.java
@ -88,7 +88,7 @@ public class DumpOrganizationProjectDatasourceTest {
 		org.apache.spark.sql.Dataset<eu.dnetlib.dhp.schema.dump.oaf.graph.Organization> verificationDataset = spark
 			.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Organization.class));

-		Assertions.assertEquals(34, verificationDataset.count());
+		Assertions.assertEquals(15, verificationDataset.count());

 		verificationDataset
 			.foreach(
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/SelectRelationTest.java
@ -0,0 +1,100 @@
+
+package eu.dnetlib.dhp.oa.graph.dump.complete;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.ForeachFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.schema.oaf.Relation;
+import net.sf.saxon.expr.instruct.ForEach;
+
+public class SelectRelationTest {
+
+	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+	private static SparkSession spark;
+
+	private static Path workingDir;
+
+	private static final Logger log = LoggerFactory
+		.getLogger(SelectRelationTest.class);
+
+	private static HashMap<String, String> map = new HashMap<>();
+
+	@BeforeAll
+	public static void beforeAll() throws IOException {
+		workingDir = Files
+			.createTempDirectory(SelectRelationTest.class.getSimpleName());
+		log.info("using work dir {}", workingDir);
+
+		SparkConf conf = new SparkConf();
+		conf.setAppName(SelectRelationTest.class.getSimpleName());
+
+		conf.setMaster("local[*]");
+		conf.set("spark.driver.host", "localhost");
+		conf.set("hive.metastore.local", "true");
+		conf.set("spark.ui.enabled", "false");
+		conf.set("spark.sql.warehouse.dir", workingDir.toString());
+		conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString());
+
+		spark = SparkSession
+			.builder()
+			.appName(SelectRelationTest.class.getSimpleName())
+			.config(conf)
+			.getOrCreate();
+	}
+
+	@AfterAll
+	public static void afterAll() throws IOException {
+		FileUtils.deleteDirectory(workingDir.toFile());
+		spark.stop();
+	}
+
+	@Test
+	public void test1() throws Exception {
+
+		final String sourcePath = getClass()
+			.getResource("/eu/dnetlib/dhp/oa/graph/dump/selectrelations")
+			.getPath();
+
+		SparkSelectValidRelationsJob.main(new String[] {
+			"-isSparkSessionManaged", Boolean.FALSE.toString(),
+			"-outputPath", workingDir.toString() + "/relation",
+			"-sourcePath", sourcePath
+		});
+
+//		dumpCommunityProducts.exec(MOCK_IS_LOOK_UP_URL,Boolean.FALSE, workingDir.toString()+"/dataset",sourcePath,"eu.dnetlib.dhp.schema.oaf.Dataset","eu.dnetlib.dhp.schema.dump.oaf.Dataset");
+
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+		JavaRDD<eu.dnetlib.dhp.schema.oaf.Relation> tmp = sc
+			.textFile(workingDir.toString() + "/relation")
+			.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.schema.oaf.Relation.class));
+
+		org.apache.spark.sql.Dataset<Relation> verificationDataset = spark
+			.createDataset(tmp.rdd(), Encoders.bean(eu.dnetlib.dhp.schema.oaf.Relation.class));
+
+		Assertions.assertTrue(verificationDataset.count() == 7);
+
+	}
+
+}
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java
@ -137,5 +137,10 @@ public class SplitPerFunderTest {
 			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
 		Assertions.assertEquals(3, tmp.count());

+		// H2020 3
+		tmp = sc
+			.textFile(workingDir.toString() + "/split/MZOS")
+			.map(item -> OBJECT_MAPPER.readValue(item, CommunityResult.class));
+		Assertions.assertEquals(1, tmp.count());
 	}
 }
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/publication
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/publication
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump/relation
@ -6,3 +6,4 @@
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::9f3ff882f023209d9ffb4dc32b77d376","subRelType":"provision","target":"40|corda_______::ffc1811633b3222e4764c7b0517f83e8"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"40|nhmrc_______::4e6c928fef9851b37ec73f4f6daca35b"}
 {"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"40|corda__h2020::846b777af165fef7c904a81712a83b66"}
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1603715146539,"properties":[],"relClass":"isProducedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"40|irb_hr______::1e5e62235d094afd01cd56e65112fc63"}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/resultDump/publication_pca
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/dataset
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/dataset
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/datasource
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/datasource
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/organization
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/organization
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/otherresearchproduct
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/otherresearchproduct
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/project
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/project
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/publication
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/publication
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/relation
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/relation
@ -0,0 +1,15 @@
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::2baa9032dc058d3c8ff780c426b0c19f","subRelType":"provision","target":"20|dedup_wf_001::2899e571609779168222fdeb59cb916d"}
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::5ac587eb28411c351c2e357eb097fd3d","subRelType":"provision","target":"20|grid________::b91f67a34df55a0aa1aabdcb3700f413"}
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"10|doajarticles::690b3aaf177a4c70b81bacd8d023cbdc","subRelType":"provision","target":"20|doajarticles::396262ee936f3d3e26ff0e60bea6cae0"}
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":true,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|doajarticles::b566fa319c3923454e1e8eb886ab62d2","subRelType":"provision","target":"20|dedup_wf_001::4e6c928fef9851b37ec73f4f6daca35b"}
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|doajarticles::e0554fb004a155bc23cfb43ee9fc8eae","subRelType":"provision","target":"20|grid________::b91f67a34df55a0aa1aabdcb3700f413"}
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|doajarticles::fbf7592ddbf2ad3cc0ed70c0f2e1d67c","subRelType":"provision","target":"20|dedup_wf_001::1b965e2c0c53e5526d269d63bcfa0ae6"}
+{"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|doajarticles::fd4c399077127f0ba09b5205e2b78406","subRelType":"provision","target":"50|doajarticles::1cae0b82b56ccd97c2db1f698def7074"}
+{"collectedfrom":[{"key":"10|infrastruct_::f66f1bd369679b5b077dcdf006089556","value":"OpenAIRE"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1594398578323,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|openaire____::8f991165fae922e29ad55d592f568464","subRelType":"provision","target":"50|openaire____::ec653e804967133b9436fdd30d3ff51d"}
+{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|opendoar____::15231a7ce4ba789d13b722cc5c955834","subRelType":"provision","target":"50|dedup_wf_001::1ea4bcb1bae8c6befef1e7f1230f0f10"}
+{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|opendoar____::16d11e9595188dbad0418a85f0351aba","subRelType":"provision","target":"40|opendoar____::041abd8c990fc531ab9bd2674a0e2725"}
+{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|opendoar____::46d3f6029f6170ebccb28945964d09bf","subRelType":"provision","target":"40|opendoar____::a5fcb8eb25ebd6f7cd219e0fa1e6ddc1"}
+{"collectedfrom":[{"key":"10|openaire____::47ce9e9f4fad46e732cff06419ecaabb","value":"OpenDOAR"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isProvidedBy","relType":"datasourceOrganization","source":"50|opendoar____::7501e5d4da87ac39d782741cd794002d","subRelType":"provision","target":"40|dedup_wf_001::04e2c34ef4daa411ff2497afc807b612"}
+{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":true,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::0cd5965141113df5739f1ac7ac7f6d37","subRelType":"participation","target":"40|aka_________::1bc716a1763110da3eb1af867de718a8"}
+{"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900000000000000022"},"lastupdatetimestamp":1592688952862,"properties":[],"relClass":"isParticipant","relType":"projectOrganization","source":"20|aka_________::1e2df822bf0932ad0f77565789f22e17","subRelType":"participation","target":"40|aka_________::a6c805bcfd383bae043d8df38e79db78"}
+{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"iis::document_affiliations","inferred":true,"invisible":false,"provenanceaction":{"classid":"iis","classname":"Inferred by OpenAIRE","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.8966"},"lastupdatetimestamp":1595258695262,"properties":[],"relClass":"isAuthorInstitutionOf","relType":"resultOrganization","source":"20|aka_________::2c3aab6bce7516338b4dbfb4f6f86db7","subRelType":"affiliation","target":"40|dedup_wf_001::02859c30f6c8bfbdd8c427068a6ec684"}
--- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/software
+++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/dump/selectrelations/software
Author	SHA1	Message	Date
Miriam Baglioni	8981a82011	-	2021-04-23 11:55:20 +02:00
Miriam Baglioni	eb0762622c	added decision node to upload on zenodo or not	2021-04-23 11:54:54 +02:00
Miriam Baglioni	a469d79b84	test for the creation of relationships between context and projects when the funding contains h2020	2021-04-23 11:52:27 +02:00
Miriam Baglioni	251178aca8	the new json schema for the result	2021-04-23 11:51:27 +02:00
Miriam Baglioni	7cf1f49d5e	if the funding does not start with H2020 but contains it the nsp should be corda__h2020	2021-04-23 11:50:26 +02:00
Miriam Baglioni	7465fa3f20	dumping only the communities with status "all". We decided those with status manager wil be available on demand	2021-04-23 11:49:45 +02:00
Miriam Baglioni	bc501f41f6	added test class for community removal from the set to be dumped	2021-04-13 16:40:24 +02:00
Miriam Baglioni	80a7170794	-	2021-04-13 16:39:55 +02:00
Miriam Baglioni	08e731916b	removed parameter communityMap when sending data to Zenodo	2021-04-13 16:38:59 +02:00
Miriam Baglioni	50d13a1d74	changed the workflow for the dump of a single community	2021-04-13 16:33:00 +02:00
Miriam Baglioni	8c4c74a640	changed logic to be able to create a dump for a single community at a time	2021-04-13 16:32:19 +02:00
Miriam Baglioni	6179deb836	removed the part after part-x- in the file name generated by spark. It was too long and created problems while creating the tar entries	2021-04-13 16:30:59 +02:00
Miriam Baglioni	04a0d1ba6e	added test method to check the creation of relations between context and projects	2021-04-09 12:49:51 +02:00
Miriam Baglioni	6b51b69cf7	added the creation of the openaireId from funder and grant number if the element is not present in the context profile	2021-04-09 12:49:07 +02:00
Miriam Baglioni	bd4b6b053d	changed classid with classname in the construction of provenance for the dump	2021-04-09 12:48:09 +02:00
Miriam Baglioni	26b34201ec	refactoring	2021-04-09 12:47:03 +02:00
Miriam Baglioni	3d94c12d6e	refactoring	2021-04-09 12:45:45 +02:00
Miriam Baglioni	95c5f97259	added the part for the extraction of relations versus projects	2021-04-09 11:31:37 +02:00
Miriam Baglioni	eaf86828e6	refactoring	2021-04-09 11:30:30 +02:00
Miriam Baglioni	c58206c3ba	added test for the creation of relations with funders	2021-04-09 11:30:07 +02:00
Miriam Baglioni	3e3a45d930	refactoring	2021-04-08 10:44:37 +02:00
Miriam Baglioni	46a322b770	changed the name of originalId in acronym	2021-04-08 10:40:06 +02:00
Miriam Baglioni	f95ec49a59	changed the substring to be pk for communities of arbitrary name length	2021-04-07 13:22:54 +02:00
Miriam Baglioni	c52355b516	refactoring	2021-04-07 12:13:45 +02:00
Miriam Baglioni	e1af14833d	refactoring	2021-04-07 12:13:00 +02:00
Miriam Baglioni	22f4930479	refactoring	2021-04-07 12:12:04 +02:00
Miriam Baglioni	7f9b7cfcf6	removing from the dump organization that have been deleted by inference	2021-04-07 12:11:36 +02:00
Miriam Baglioni	66d64947af	merge branch with master	2021-04-07 10:38:18 +02:00
Miriam Baglioni	ad6d0ca9eb	added to all the entities the check that deletedbyinference = false	2021-04-07 10:37:49 +02:00
Miriam Baglioni	26cf32c066	changed the test to mirror the change in the logic of the code	2021-04-01 18:22:57 +02:00
Miriam Baglioni	5022f1b50d	removing organization deletedbyinference from the dump	2021-04-01 18:16:40 +02:00
Miriam Baglioni	0421f5e1d8	added check to verify not to add void APC	2021-04-01 17:38:30 +02:00
Miriam Baglioni	2c209e1140	added resources for testing selection of valid relations	2021-04-01 16:57:34 +02:00
Miriam Baglioni	b3f02083e7	refactoring	2021-04-01 16:56:58 +02:00
Miriam Baglioni	8d28ca9815	added test class for the selection of valid relations	2021-04-01 16:56:32 +02:00
Miriam Baglioni	152ba8e2ef	added description	2021-04-01 16:55:57 +02:00
Miriam Baglioni	c0c225f3b2	added logic to select only the valid relations: those not deletedbyinference and having both part of the relation as entities in the graph	2021-04-01 16:53:33 +02:00
Miriam Baglioni	daabc370c5	changed the workflow to add the step for selecting the valid relations	2021-04-01 16:52:39 +02:00
Miriam Baglioni	f93356f690	refactoring	2021-04-01 16:24:08 +02:00
Miriam Baglioni	f7714645d2	merge with dump	2021-03-30 16:27:38 +02:00
Miriam Baglioni	4632795f25	merge branch with master	2021-03-30 16:27:23 +02:00
Miriam Baglioni	870ee28dd6	refactoring	2021-03-30 12:55:48 +02:00
Miriam Baglioni	08f8dd9454	refactoring	2021-03-30 12:53:07 +02:00
Miriam Baglioni	e5463fea01	added resource for apc dump	2021-03-30 12:47:07 +02:00
Miriam Baglioni	16c1a27852	added test for APC dump	2021-03-30 12:46:42 +02:00
Miriam Baglioni	d0c94462e4	refactoring	2021-03-30 12:45:34 +02:00
Miriam Baglioni	a896febc02	added APC in the dumped information	2021-03-30 11:13:07 +02:00
Miriam Baglioni	5dea729de3	added article processing charges and modified description	2021-03-30 10:49:39 +02:00
Miriam Baglioni	200e7e9c46	modified description	2021-03-30 10:49:15 +02:00
Miriam Baglioni	931b2a2e15	merge branch with master	2021-03-30 10:27:32 +02:00
Miriam Baglioni	330343937c	-	2021-02-24 12:49:27 +01:00
Miriam Baglioni	defbb71561	extended resource info to match the new test	2021-02-24 11:52:44 +01:00
Miriam Baglioni	17049f8bde	extended the test class - should have done by the start :)	2021-02-24 11:52:07 +01:00
Miriam Baglioni	cc11ee1cb9	changed the param value to directly upload on Zenodo	2021-02-24 11:51:40 +01:00
Miriam Baglioni	871e5bea29	should have fixed for real now	2021-02-24 11:51:20 +01:00
Miriam Baglioni	5d92df0627	tried again to fix issue for croatian funder	2021-02-24 10:49:55 +01:00
Miriam Baglioni	9841086ef3	modified code to split the Croazian funders	2021-02-23 18:09:14 +01:00
Miriam Baglioni	d4ad740c98	merge branch with master	2021-02-23 11:10:41 +01:00
Miriam Baglioni	a684e1065e	merge branch with master	2021-02-23 10:45:42 +01:00
Miriam Baglioni	f7c35e6311	merge branch with master	2021-02-08 10:39:10 +01:00
Miriam Baglioni	9bdadd4ddb	merge branch with master	2021-01-22 11:55:27 +01:00
Miriam Baglioni	0d76e039cf	changed to logic to verify in community is contained in the list of context of a result	2020-12-16 10:53:10 +01:00
Miriam Baglioni	7c86e66697	merge branch with master	2020-12-16 10:51:18 +01:00
Miriam Baglioni	bc09d37e8c	used constants in ModelConstants class	2020-12-10 10:01:23 +01:00
Miriam Baglioni	815c7c11aa	merge branch with master	2020-12-03 11:28:01 +01:00