fixed issues while running on cluster

2020-06-15 11:12:14 +02:00 · 2020-06-15 11:12:14 +02:00 · f96ca900e1
parent 56e70573c2
commit f96ca900e1
10 changed files with 867 additions and 565 deletions
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/CommunityMap.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/CommunityMap.java
@ -1,7 +1,8 @@
 package eu.dnetlib.dhp.oa.graph.dump;
-
+import java.io.Serializable;
 import java.util.HashMap;
-public class CommunityMap extends HashMap<String, String> {
+public class CommunityMap extends HashMap<String, String> implements Serializable {
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Constants.java
@ -1,18 +1,29 @@
 package eu.dnetlib.dhp.oa.graph.dump;
-import com.google.common.collect.Maps;
+package eu.dnetlib.dhp.oa.graph.dump;
 import java.util.Map;
 import com.google.common.collect.Maps;
 public class Constants {
-    public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
+	public static final Map<String, String> accessRightsCoarMap = Maps.newHashMap();
 	public static final Map<String, String> coarCodeLabelMap = Maps.newHashMap();
-    static {
+	public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
-        accessRightsCoarMap.put("OPEN", "http://purl.org/coar/access_right/c_abf2");
+
-        accessRightsCoarMap.put("RESTRICTED", "http://purl.org/coar/access_right/c_16ec");
+	static {
-        accessRightsCoarMap.put("OPEN SOURCE", "http://purl.org/coar/access_right/c_abf2");
+		accessRightsCoarMap.put("OPEN", "c_abf2");
-        accessRightsCoarMap.put("CLOSED", "http://purl.org/coar/access_right/c_14cb //metadataonly for coar");
+		accessRightsCoarMap.put("RESTRICTED", "c_16ec");
-        accessRightsCoarMap.put("EMBARGO", "http://purl.org/coar/access_right/c_f1cf");
+		accessRightsCoarMap.put("OPEN SOURCE", "c_abf2");
-    }
+		accessRightsCoarMap.put("CLOSED", "c_14cb");
 		accessRightsCoarMap.put("EMBARGO", "c_f1cf");
 	}
 	static {
 		coarCodeLabelMap.put("c_abf2", "OPEN");
 		coarCodeLabelMap.put("c_16ec", "RESTRICTED");
 		coarCodeLabelMap.put("c_14cb", "CLOSED");
 		coarCodeLabelMap.put("c_f1cf", "EMBARGO");
 	}
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Mapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Mapper.java
@ -1,223 +1,407 @@
 package eu.dnetlib.dhp.oa.graph.dump;
-import eu.dnetlib.dhp.schema.dump.oaf.*;
+package eu.dnetlib.dhp.oa.graph.dump;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.Journal;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import scala.collection.immutable.Stream;
 import java.io.Serializable;
 import java.util.*;
 import java.util.stream.Collectors;
 import javax.swing.text.html.Option;
 import org.apache.avro.generic.GenericData;
 import eu.dnetlib.dhp.schema.dump.oaf.*;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.Field;
 import eu.dnetlib.dhp.schema.oaf.Journal;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 public class Mapper implements Serializable {
-    public static <I extends eu.dnetlib.dhp.schema.oaf.Result, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O map(
+	public static <I extends eu.dnetlib.dhp.schema.oaf.Result, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> O map(
-            I input, Map<String,String> communityMap){
+		I input, Map<String, String> communityMap) {
-        O out = null;
+		O out = null;
-        switch (input.getResulttype().getClassid()){
+		Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> ort = Optional.ofNullable(input.getResulttype());
-            case "publication":
+		if (ort.isPresent()) {
-                out = (O)new Publication();
+			switch (ort.get().getClassid()) {
-                Optional<Journal> journal = Optional.ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
+				case "publication":
-                if(journal.isPresent()){
+					out = (O) new Publication();
-                    Journal j = journal.get();
+					Optional<Journal> journal = Optional
-                        Container c = new Container();
+						.ofNullable(((eu.dnetlib.dhp.schema.oaf.Publication) input).getJournal());
-                        c.setConferencedate(j.getConferencedate());
+					if (journal.isPresent()) {
-                        c.setConferenceplace(j.getConferenceplace());
+						Journal j = journal.get();
-                        c.setEdition(j.getEdition());
+						Container c = new Container();
-                        c.setEp(j.getEp());
+						c.setConferencedate(j.getConferencedate());
-                        c.setIss(j.getIss());
+						c.setConferenceplace(j.getConferenceplace());
-                        c.setIssnLinking(j.getIssnLinking());
+						c.setEdition(j.getEdition());
-                        c.setIssnOnline(j.getIssnOnline());
+						c.setEp(j.getEp());
-                        c.setIssnPrinted(j.getIssnPrinted());
+						c.setIss(j.getIss());
-                        c.setName(j.getName());
+						c.setIssnLinking(j.getIssnLinking());
-                        c.setSp(j.getSp());
+						c.setIssnOnline(j.getIssnOnline());
-                        c.setVol(j.getVol());
+						c.setIssnPrinted(j.getIssnPrinted());
-                        out.setContainer(c);
+						c.setName(j.getName());
-                    }
+						c.setSp(j.getSp());
-                break;
+						c.setVol(j.getVol());
-            case "dataset":
+						out.setContainer(c);
-                Dataset d = new Dataset();
+					}
-                eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset)input;
+					break;
-                d.setSize(id.getSize().getValue());
+				case "dataset":
-                d.setVersion(id.getVersion().getValue());
+					Dataset d = new Dataset();
 					eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
 					Optional.ofNullable(id.getSize()).ifPresent(v -> d.setSize(v.getValue()));
 					Optional.ofNullable(id.getVersion()).ifPresent(v -> d.setVersion(v.getValue()));
-                List<eu.dnetlib.dhp.schema.oaf.GeoLocation> igl = id.getGeolocation();
+					d
-                d.setGeolocation(igl.stream()
+						.setGeolocation(
-                        .filter(Objects::nonNull)
+							Optional
-                        .map(gli -> {
+								.ofNullable(id.getGeolocation())
-                            GeoLocation gl = new GeoLocation();
+								.map(
-                            gl.setBox(gli.getBox());
+									igl -> igl
-                            gl.setPlace(gli.getPlace());
+										.stream()
-                            gl.setPoint(gli.getPoint());
+										.filter(Objects::nonNull)
-                            return gl;
+										.map(gli -> {
-                        }).collect(Collectors.toList()));
+											GeoLocation gl = new GeoLocation();
-                out = (O)d;
+											gl.setBox(gli.getBox());
 											gl.setPlace(gli.getPlace());
 											gl.setPoint(gli.getPoint());
 											return gl;
 										})
 										.collect(Collectors.toList()))
 								.orElse(null));
-                break;
+					out = (O) d;
            case "software":
                Software s = new Software();
                eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software)input;
                s.setCodeRepositoryUrl(is.getCodeRepositoryUrl().getValue());
                s.setDocumentationUrl(is.getDocumentationUrl()
                        .stream()
                        .map(du -> du.getValue()).collect(Collectors.toList()));
                s.setProgrammingLanguage(is.getProgrammingLanguage().getClassid());
-                out = (O) s;
+					break;
-                break;
+				case "software":
-            case "otherresearchproduct":
+					Software s = new Software();
-                OtherResearchProduct or = new OtherResearchProduct();
+					eu.dnetlib.dhp.schema.oaf.Software is = (eu.dnetlib.dhp.schema.oaf.Software) input;
-                eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct)input;
+					Optional
-                or.setContactgroup(ir.getContactgroup().stream().map(cg -> cg.getValue()).collect(Collectors.toList()));
+						.ofNullable(is.getCodeRepositoryUrl())
-                or.setContactperson(ir.getContactperson().stream().map(cp->cp.getValue()).collect(Collectors.toList()));
+						.ifPresent(value -> s.setCodeRepositoryUrl(value.getValue()));
-                or.setTool(ir.getTool().stream().map(t -> t.getValue()).collect(Collectors.toList()));
+					Optional
-                out = (O) or;
+						.ofNullable(is.getDocumentationUrl())
-                break;
+						.ifPresent(
-        }
+							value -> s
-        out.setAuthor(input.getAuthor()
+								.setDocumentationUrl(
-                .stream()
+									value
-                .map(oa -> {
+										.stream()
-                    Author a = new Author();
+										.map(v -> v.getValue())
-                    a.setAffiliation(oa.getAffiliation().stream().map(aff -> aff.getValue()).collect(Collectors.toList()));
+										.collect(Collectors.toList())));
                    a.setFullname(oa.getFullname());
                    a.setName(oa.getName());
                    a.setSurname(oa.getSurname());
                    a.setRank(oa.getRank());
                    a.setPid(oa.getPid().stream().map(p -> {
                        ControlledField cf = new ControlledField();
                        cf.setScheme( p.getQualifier().getClassid());
                        cf.setValue( p.getValue());
                        return cf;
                    }).collect(Collectors.toList()));
                    return a;
                }).collect(Collectors.toList()));
        //I do not map Access Right UNKNOWN or OTHER
        if (Constants.accessRightsCoarMap.containsKey(input.getBestaccessright().getClassid())){
            AccessRight ar = new AccessRight();
            ar.setSchema(Constants.accessRightsCoarMap.get(input.getBestaccessright().getClassid()));
            ar.setCode(input.getBestaccessright().getClassid());
            ar.setLabel(input.getBestaccessright().getClassname());
            out.setBestaccessright(ar);
        }
-        out.setCollectedfrom(input.getCollectedfrom().stream().map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
+					Optional
-                .collect(Collectors.toList()));
+						.ofNullable(is.getProgrammingLanguage())
 						.ifPresent(value -> s.setProgrammingLanguage(value.getClassid()));
-        Set<String> communities = communityMap.keySet();
+					out = (O) s;
-        List<Context> contextList = input.getContext()
+					break;
-                .stream()
+				case "other":
-                .map(c -> {
+					OtherResearchProduct or = new OtherResearchProduct();
-                    if(communities.contains(c.getId())){
+					eu.dnetlib.dhp.schema.oaf.OtherResearchProduct ir = (eu.dnetlib.dhp.schema.oaf.OtherResearchProduct) input;
-                        Context context = new Context();
+					or
-                        context.setCode(c.getId());
+						.setContactgroup(
-                        context.setLabel(communityMap.get(c.getId()));
+							Optional
-                        Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
+								.ofNullable(ir.getContactgroup())
-                        if(dataInfo.isPresent()){
+								.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList()))
-                           context.setProvenance(dataInfo.get().stream()
+								.orElse(null));
                                    .map(di -> {
                                        if (di.getInferred()){
                                            return di.getProvenanceaction().getClassid();
                                        }
                                        return null;
                                    }).filter(Objects::nonNull)
                                   .collect(Collectors.toList()));
                        }
                        return context;
                    }
                    return null;
                }
                ).filter(Objects::nonNull)
        .collect(Collectors.toList());
        if(contextList.size() > 0){
            out.setContext(contextList);
        }
        out.setContributor(input.getContributor()
                .stream()
                .map(c -> c.getValue()).collect(Collectors.toList()));
        out.setCountry(input.getCountry()
                .stream()
                .map(c -> {
                    Country country = new Country();
                    country.setCode(c.getClassid());
                    country.setLabel(c.getClassname());
                    Optional<DataInfo> dataInfo = Optional.ofNullable(c.getDataInfo());
                    if(dataInfo.isPresent()){
                        country.setProvenance(dataInfo.get().getProvenanceaction().getClassid());
                    }
                    return country;
                }).collect(Collectors.toList()));
        out.setCoverage(input.getCoverage().stream().map(c->c.getValue()).collect(Collectors.toList()));
-        out.setDateofcollection(input.getDateofcollection());
+					or
-        out.setDescription(input.getDescription().stream().map(d->d.getValue()).collect(Collectors.toList()));
+						.setContactperson(
-        out.setEmbargoenddate(input.getEmbargoenddate().getValue());
+							Optional
-        out.setFormat(input.getFormat().stream().map(f->f.getValue()).collect(Collectors.toList()));
+								.ofNullable(ir.getContactperson())
-        out.setId(input.getId());
+								.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList()))
-        out.setOriginalId(input.getOriginalId());
+								.orElse(null));
-        out.setInstance(input.getInstance()
+					or
-                .stream()
+						.setTool(
-                .map(i -> {
+							Optional
-                    Instance instance = new Instance();
+								.ofNullable(ir.getTool())
-                    AccessRight ar = new AccessRight();
+								.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList()))
-                    ar.setCode(i.getAccessright().getClassid());
+								.orElse(null));
-                    ar.setLabel(i.getAccessright().getClassname());
+					out = (O) or;
-                    if(Constants.accessRightsCoarMap.containsKey(i.getAccessright().getClassid())){
+					break;
-                        ar.setSchema(Constants.accessRightsCoarMap.get(i.getAccessright().getClassid()));
+			}
-                    }
+			Optional<List<eu.dnetlib.dhp.schema.oaf.Author>> oAuthor = Optional.ofNullable(input.getAuthor());
-                    instance.setAccessright(ar);
+			if (oAuthor.isPresent()) {
-                    instance.setCollectedfrom(KeyValue.newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
+				// List<eu.dnetlib.dhp.schema.dump.oaf.Author> authorList = new ArrayList<>();
-                    instance.setHostedby(KeyValue.newInstance(i.getHostedby().getKey(),i.getHostedby().getValue()));
+				out
-                    instance.setLicense(i.getLicense().getValue());
+					.setAuthor(
-                    instance.setPublicationdata(i.getDateofacceptance().getValue());
+						oAuthor
-                    instance.setRefereed(i.getRefereed().getValue());
+							.get()
-                    instance.setType(i.getInstancetype().getClassid());
+							.stream()
-                    instance.setUrl(i.getUrl());
+							.map(oa -> getAuthor(oa))
-                    return instance;
+							.collect(Collectors.toList()));
-                }).collect(Collectors.toList()));
+			}
-        out.setLanguage(Qualifier.newInstance(input.getLanguage().getClassid(), input.getLanguage().getClassname()));
+			// I do not map Access Right UNKNOWN or OTHER
        out.setLastupdatetimestamp(input.getLastupdatetimestamp());
-        Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
+			Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oar = Optional.ofNullable(input.getBestaccessright());
-        if(otitle.isPresent()){
+			if (oar.isPresent()) {
-            List<StructuredProperty> iTitle = otitle.get()
+				if (Constants.accessRightsCoarMap.containsKey(oar.get().getClassid())) {
-                    .stream()
+					String code = Constants.accessRightsCoarMap.get(oar.get().getClassid());
-                    .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
+					out
-                    .collect(Collectors.toList());
+						.setBestaccessright(
-            if(iTitle.size() > 0 ){
+							AccessRight
-                out.setMaintitle(iTitle.get(0).getValue());
+								.newInstance(
-            }
+									code,
 									Constants.coarCodeLabelMap.get(code),
 									Constants.COAR_ACCESS_RIGHT_SCHEMA));
 				}
 			}
-            iTitle = otitle.get()
+			out
-                    .stream()
+				.setCollectedfrom(
-                    .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
+					input
-                    .collect(Collectors.toList());
+						.getCollectedfrom()
-            if(iTitle.size() > 0){
+						.stream()
-                out.setSubtitle(iTitle.get(0).getValue());
+						.map(cf -> KeyValue.newInstance(cf.getKey(), cf.getValue()))
-            }
+						.collect(Collectors.toList()));
-        }
+			Set<String> communities = communityMap.keySet();
 			List<Context> contextList = input
 				.getContext()
 				.stream()
 				.map(c -> {
 					if (communities.contains(c.getId())
 						|| communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
 						Context context = new Context();
 						if (!communityMap.containsKey(c.getId())) {
 							context.setCode(c.getId().substring(0, c.getId().indexOf("::")));
 							context.setLabel(communityMap.get(context.getCode()));
 						} else {
 							context.setCode(c.getId());
 							context.setLabel(communityMap.get(c.getId()));
 						}
 						Optional<List<DataInfo>> dataInfo = Optional.ofNullable(c.getDataInfo());
 						if (dataInfo.isPresent()) {
 							List<String> provenance = new ArrayList<>();
 							provenance
 								.addAll(
 									dataInfo
 										.get()
 										.stream()
 										.map(di -> {
 											if (di.getInferred()) {
 												return di.getProvenanceaction().getClassname();
 											}
 											return null;
 										})
 										.filter(Objects::nonNull)
 										.collect(Collectors.toSet()));
 							context.setProvenance(provenance);
 						}
 						return context;
 					}
 					return null;
 				})
 				.filter(Objects::nonNull)
 				.collect(Collectors.toList());
 			if (contextList.size() > 0) {
 				out.setContext(contextList);
 			}
 			final List<String> contributorList = new ArrayList<>();
 			Optional
 				.ofNullable(input.getContributor())
 				.ifPresent(value -> value.stream().forEach(c -> contributorList.add(c.getValue())));
 			out.setContributor(contributorList);
-        out.setPid(input.getPid().stream().map(p -> {
+			List<Country> countryList = new ArrayList<>();
-            ControlledField pid = new ControlledField();
+			Optional
-            pid.setScheme(p.getQualifier().getClassid());
+				.ofNullable(input.getCountry())
-            pid.setValue(p.getValue());
+				.ifPresent(
-            return pid;
+					value -> value
-        }).collect(Collectors.toList()));
+						.stream()
-        out.setPublicationdata(input.getDateofacceptance().getValue());
+						.forEach(
-        out.setPublisher(input.getPublisher().getValue());
+							c -> {
-        out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
+								Country country = new Country();
-        out.setSubject(input.getSubject().stream().map(s->{
+								country.setCode(c.getClassid());
-            ControlledField subject = new ControlledField();
+								country.setLabel(c.getClassname());
-            subject.setScheme(s.getQualifier().getClassid());
+								Optional
-            subject.setValue(s.getValue());
+									.ofNullable(c.getDataInfo())
-            return subject;
+									.ifPresent(
-        }).collect(Collectors.toList()));
+										provenance -> country
-        out.setType(input.getResulttype().getClassid());
+											.setProvenance(
 												provenance
 													.getProvenanceaction()
 													.getClassname()));
 								countryList
 									.add(country);
 							}));
-        return out;
+			out.setCountry(countryList);
-    }
+
 			final List<String> coverageList = new ArrayList<>();
 			Optional
 				.ofNullable(input.getCoverage())
 				.ifPresent(value -> value.stream().forEach(c -> coverageList.add(c.getValue())));
 			out.setCoverage(coverageList);
 			out.setDateofcollection(input.getDateofcollection());
 			final List<String> descriptionList = new ArrayList<>();
 			Optional
 				.ofNullable(input.getDescription())
 				.ifPresent(value -> value.stream().forEach(d -> descriptionList.add(d.getValue())));
 			out.setDescription(descriptionList);
 			Optional<Field<String>> oStr = Optional.ofNullable(input.getEmbargoenddate());
 			if (oStr.isPresent()) {
 				out.setEmbargoenddate(oStr.get().getValue());
 			}
 			final List<String> formatList = new ArrayList<>();
 			Optional
 				.ofNullable(input.getFormat())
 				.ifPresent(value -> value.stream().forEach(f -> formatList.add(f.getValue())));
 			out.setFormat(formatList);
 			out.setId(input.getId());
 			out.setOriginalId(input.getOriginalId());
 			final List<Instance> instanceList = new ArrayList<>();
 			Optional
 				.ofNullable(input.getInstance())
 				.ifPresent(
 					inst -> inst
 						.stream()
 						.forEach(i -> {
 							Instance instance = new Instance();
 							Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> opAr = Optional
 								.ofNullable(i.getAccessright());
 							if (opAr.isPresent()) {
 								if (Constants.accessRightsCoarMap.containsKey(opAr.get().getClassid())) {
 									String code = Constants.accessRightsCoarMap.get(opAr.get().getClassid());
 									instance
 										.setAccessright(
 											AccessRight
 												.newInstance(
 													code,
 													Constants.coarCodeLabelMap.get(code),
 													Constants.COAR_ACCESS_RIGHT_SCHEMA));
 								}
 							}
 							instance
 								.setCollectedfrom(
 									KeyValue
 										.newInstance(i.getCollectedfrom().getKey(), i.getCollectedfrom().getValue()));
 							instance
 								.setHostedby(
 									KeyValue.newInstance(i.getHostedby().getKey(), i.getHostedby().getValue()));
 							Optional
 								.ofNullable(i.getLicense())
 								.ifPresent(value -> instance.setLicense(value.getValue()));
 							Optional
 								.ofNullable(i.getDateofacceptance())
 								.ifPresent(value -> instance.setPublicationdate(value.getValue()));
 							Optional
 								.ofNullable(i.getRefereed())
 								.ifPresent(value -> instance.setRefereed(value.getValue()));
 							Optional
 								.ofNullable(i.getInstancetype())
 								.ifPresent(value -> instance.setType(value.getClassname()));
 							Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value));
 							instanceList.add(instance);
 						}));
 			out
 				.setInstance(instanceList);
 			Optional<eu.dnetlib.dhp.schema.oaf.Qualifier> oL = Optional.ofNullable(input.getLanguage());
 			if (oL.isPresent()) {
 				eu.dnetlib.dhp.schema.oaf.Qualifier language = oL.get();
 				out.setLanguage(Qualifier.newInstance(language.getClassid(), language.getClassname()));
 			}
 			Optional<Long> oLong = Optional.ofNullable(input.getLastupdatetimestamp());
 			if (oLong.isPresent()) {
 				out.setLastupdatetimestamp(oLong.get());
 			}
 			Optional<List<StructuredProperty>> otitle = Optional.ofNullable(input.getTitle());
 			if (otitle.isPresent()) {
 				List<StructuredProperty> iTitle = otitle
 					.get()
 					.stream()
 					.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
 					.collect(Collectors.toList());
 				if (iTitle.size() > 0) {
 					out.setMaintitle(iTitle.get(0).getValue());
 				}
 				iTitle = otitle
 					.get()
 					.stream()
 					.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
 					.collect(Collectors.toList());
 				if (iTitle.size() > 0) {
 					out.setSubtitle(iTitle.get(0).getValue());
 				}
 			}
 			List<ControlledField> pids = new ArrayList<>();
 			Optional
 				.ofNullable(input.getPid())
 				.ifPresent(
 					value -> value
 						.stream()
 						.forEach(
 							p -> pids
 								.add(
 									ControlledField
 										.newInstance(p.getQualifier().getClassid(), p.getValue()))));
 			out.setPid(pids);
 			oStr = Optional.ofNullable(input.getDateofacceptance());
 			if (oStr.isPresent()) {
 				out.setPublicationdate(oStr.get().getValue());
 			}
 			oStr = Optional.ofNullable(input.getPublisher());
 			if (oStr.isPresent()) {
 				out.setPublisher(oStr.get().getValue());
 			}
 			List<String> sourceList = new ArrayList<>();
 			Optional
 				.ofNullable(input.getSource())
 				.ifPresent(value -> value.stream().forEach(s -> sourceList.add(s.getValue())));
 			// out.setSource(input.getSource().stream().map(s -> s.getValue()).collect(Collectors.toList()));
 			List<ControlledField> subjectList = new ArrayList<>();
 			Optional
 				.ofNullable(input.getSubject())
 				.ifPresent(
 					value -> value
 						.stream()
 						.forEach(
 							s -> subjectList
 								.add(ControlledField.newInstance(s.getQualifier().getClassid(), s.getValue()))));
 			out.setSubject(subjectList);
 			out.setType(input.getResulttype().getClassid());
 		}
 		return out;
 	}
 	private static Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) {
 		Author a = new Author();
 		Optional
 			.ofNullable(oa.getAffiliation())
 			.ifPresent(
 				value -> a
 					.setAffiliation(
 						value
 							.stream()
 							.map(aff -> aff.getValue())
 							.collect(Collectors.toList())));
 		a.setFullname(oa.getFullname());
 		a.setName(oa.getName());
 		a.setSurname(oa.getSurname());
 		a.setRank(oa.getRank());
 		Optional
 			.ofNullable(oa.getPid())
 			.ifPresent(
 				value -> a
 					.setPid(
 						value
 							.stream()
 							.map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
 							.collect(Collectors.toList())));
 		return a;
 	}
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java
@ -1,34 +1,32 @@
 package eu.dnetlib.dhp.oa.graph.dump;
-import eu.dnetlib.dhp.utils.ISLookupClientFactory;
+import java.io.StringReader;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import java.util.List;
-import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.Element;
 import org.dom4j.io.SAXReader;
-import java.io.StringReader;
+import eu.dnetlib.dhp.utils.ISLookupClientFactory;
-import java.util.HashMap;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
-import java.util.List;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import java.util.Map;
 public class QueryInformationSystem {
 	private ISLookUpService isLookUp;
-	private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " +
+	private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "
-			"  where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
+		+
-			"  return " +
+		"  where $x//CONFIGURATION/context[./@type='community' or ./@type='ri'] " +
-			"<community> " +
+		"  return " +
-			"{$x//CONFIGURATION/context/@id}" +
+		"<community> " +
-			"{$x//CONFIGURATION/context/@label}" +
+		"{$x//CONFIGURATION/context/@id}" +
-			"</community>";
+		"{$x//CONFIGURATION/context/@label}" +
 		"</community>";
-
+	public CommunityMap getCommunityMap()
 	public Map<String,String> getCommunityMap()
 		throws ISLookUpException {
 		return getMap(isLookUp.quickSearchProfile(XQUERY));
@ -42,12 +40,8 @@ public class QueryInformationSystem {
 		this.isLookUp = isLookUpService;
 	}
-public void set(String isLookUpUrl){
+	public static CommunityMap getMap(List<String> communityMap) {
-		isLookUpUrl = get(isLookUpUrl);
+		final CommunityMap map = new CommunityMap();
 }
 	public ISLookUpService
 	private static Map<String, String> getMap(List<String> communityMap) {
 		final Map<String, String> map = new HashMap<>();
 		communityMap.stream().forEach(xml -> {
 			final Document doc;
@ -59,7 +53,6 @@ public void set(String isLookUpUrl){
 				e.printStackTrace();
 			}
 		});
 		return map;
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultProject.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultProject.java
@ -1,27 +1,28 @@
 package eu.dnetlib.dhp.oa.graph.dump;
-import eu.dnetlib.dhp.schema.dump.oaf.Projects;
+package eu.dnetlib.dhp.oa.graph.dump;
 import java.io.Serializable;
 import java.util.List;
 import eu.dnetlib.dhp.schema.dump.oaf.Projects;
 public class ResultProject implements Serializable {
-    private String resultId;
+	private String resultId;
-    private List<Projects> projectsList;
+	private List<Projects> projectsList;
-    public String getResultId() {
+	public String getResultId() {
-        return resultId;
+		return resultId;
-    }
+	}
-    public void setResultId(String resultId) {
+	public void setResultId(String resultId) {
-        this.resultId = resultId;
+		this.resultId = resultId;
-    }
+	}
-    public List<Projects> getProjectsList() {
+	public List<Projects> getProjectsList() {
-        return projectsList;
+		return projectsList;
-    }
+	}
-    public void setProjectsList(List<Projects> projectsList) {
+	public void setProjectsList(List<Projects> projectsList) {
-        this.projectsList = projectsList;
+		this.projectsList = projectsList;
-    }
+	}
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkDumpCommunityProducts.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkDumpCommunityProducts.java
@ -1,3 +1,4 @@
 package eu.dnetlib.dhp.oa.graph.dump;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
@ -6,10 +7,8 @@ import java.io.Serializable;
 import java.util.*;
 import java.util.stream.Collectors;
-import eu.dnetlib.dhp.schema.oaf.Context;
+import javax.management.Query;
-import eu.dnetlib.dhp.utils.ISLookupClientFactory;
+
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.Dataset;
@ -19,127 +18,129 @@ import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.google.gson.Gson;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.Context;
 import eu.dnetlib.dhp.schema.oaf.Result;
-
+import eu.dnetlib.dhp.utils.ISLookupClientFactory;
-import javax.management.Query;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
-
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 public class SparkDumpCommunityProducts implements Serializable {
-    private static final Logger log = LoggerFactory.getLogger(SparkDumpCommunityProducts.class);
+	private static final Logger log = LoggerFactory.getLogger(SparkDumpCommunityProducts.class);
-    private QueryInformationSystem queryInformationSystem;
+	private static QueryInformationSystem queryInformationSystem;
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkDumpCommunityProducts.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
-    public static void main(String[] args) throws Exception {
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
-        String jsonConfiguration = IOUtils
+		parser.parseArgument(args);
                .toString(
                        SparkDumpCommunityProducts.class
                                .getResourceAsStream(
                                        "/eu/dnetlib/dhp/oa/graph/dump/input_parameters.json"));
-        final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		Boolean isSparkSessionManaged = Optional
-        parser.parseArgument(args);
+			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-        Boolean isSparkSessionManaged = Optional
+		final String inputPath = parser.get("sourcePath");
-                .ofNullable(parser.get("isSparkSessionManaged"))
+		log.info("inputPath: {}", inputPath);
                .map(Boolean::valueOf)
                .orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
-        final String inputPath = parser.get("sourcePath");
+		final String resultClassName = parser.get("resultTableName");
-        log.info("inputPath: {}", inputPath);
+		log.info("resultTableName: {}", resultClassName);
-        final String outputPath = parser.get("outputPath");
+		final String dumpClassName = parser.get("dumpTableName");
-        log.info("outputPath: {}", outputPath);
+		log.info("dumpClassName: {}", dumpClassName);
-        final String resultClassName = parser.get("resultTableName");
+		final String isLookUpUrl = parser.get("isLookUpUrl");
-        log.info("resultTableName: {}", resultClassName);
+		log.info("isLookUpUrl: {}", isLookUpUrl);
-        final String dumpClassName = parser.get("dumpTableName");
+//        final String resultType = parser.get("resultType");
-        log.info("dumpClassName: {}", dumpClassName);
+//        log.info("resultType: {}", resultType);
-        final String isLookUpUrl = parser.get("isLookUpUrl");
+		final Optional<String> cm = Optional.ofNullable(parser.get("communityMap"));
        log.info("isLookUpUrl: {}", isLookUpUrl);
-        final String resultType = parser.get("resultType");
+		Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
-        log.info("resultType: {}", resultType);
+		Class<? extends eu.dnetlib.dhp.schema.dump.oaf.Result> dumpClazz = (Class<? extends eu.dnetlib.dhp.schema.dump.oaf.Result>) Class
 			.forName(dumpClassName);
 		SparkConf conf = new SparkConf();
-        SparkDumpCommunityProducts sdcp = new SparkDumpCommunityProducts();
+		CommunityMap communityMap;
-        sdcp.exec(isLookUpUrl, isSparkSessionManaged, outputPath,
+		if (!isLookUpUrl.equals("BASEURL:8280/is/services/isLookUp")) {
-                inputPath, resultClassName, dumpClassName);
+			queryInformationSystem = new QueryInformationSystem();
 			queryInformationSystem.setIsLookUp(getIsLookUpService(isLookUpUrl));
 			communityMap = queryInformationSystem.getCommunityMap();
 		} else {
 			communityMap = new Gson().fromJson(cm.get(), CommunityMap.class);
 		}
-    }
+		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
 				execDump(spark, inputPath, outputPath, communityMap, inputClazz, dumpClazz);
-    public QueryInformationSystem getQueryInformationSystem() {
+			});
        return queryInformationSystem;
    }
-    public void setQueryInformationSystem(QueryInformationSystem queryInformationSystem) {
+	}
        this.queryInformationSystem = queryInformationSystem;
    }
-    public ISLookUpService getIsLookUpService(String isLookUpUrl){
+	public static ISLookUpService getIsLookUpService(String isLookUpUrl) {
-        return ISLookupClientFactory.getLookUpService(isLookUpUrl);
+		return ISLookupClientFactory.getLookUpService(isLookUpUrl);
-    }
+	}
-    public void exec(String isLookUpUrl, Boolean isSparkSessionManaged, String outputPath, String inputPath,
+	public static <I extends Result, O extends eu.dnetlib.dhp.schema.dump.oaf.Result> void execDump(SparkSession spark,
-                     String resultClassName, String dumpClassName) throws ISLookUpException, ClassNotFoundException {
+		String inputPath,
-        SparkConf conf = new SparkConf();
+		String outputPath,
 		CommunityMap communityMap,
 		Class<I> inputClazz,
 		Class<O> dumpClazz) {
-        Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
+		// Set<String> communities = communityMap.keySet();
-        Class<? extends eu.dnetlib.dhp.schema.dump.oaf.Result> dumpClazz =
+		Dataset<I> tmp = Utils.readPath(spark, inputPath, inputClazz);
                (Class<? extends eu.dnetlib.dhp.schema.dump.oaf.Result>) Class.forName(dumpClassName);
-        queryInformationSystem.setIsLookUp(getIsLookUpService(isLookUpUrl));
+		tmp
-        Map<String,String>
+			.map(value -> execMap(value, communityMap), Encoders.bean(dumpClazz))
-                communityMap = queryInformationSystem.getCommunityMap();
+			.filter(Objects::nonNull)
-        runWithSparkSession(
+			.write()
-                conf,
+			.mode(SaveMode.Overwrite)
-                isSparkSessionManaged,
+			.option("compression", "gzip")
-                spark -> {
+			.json(outputPath);
                    Utils.removeOutputDir(spark, outputPath);
                    execDump(spark, inputPath, outputPath , communityMap, inputClazz, dumpClazz);
                });
    }
 	}
-    private  <I extends Result, O extends eu.dnetlib.dhp.schema.dump.oaf.Result > void execDump(
+	private static <O extends eu.dnetlib.dhp.schema.dump.oaf.Result, I extends Result> O execMap(I value,
-            SparkSession spark,
+		CommunityMap communityMap) {
-            String inputPath,
+		{
-            String outputPath,
+			Set<String> communities = communityMap.keySet();
-            Map<String,String> communityMap,
+			Optional<List<Context>> inputContext = Optional.ofNullable(value.getContext());
-            Class<I> inputClazz,
+			if (!inputContext.isPresent()) {
-            Class<O> dumpClazz) {
+				return null;
-
+			}
-        Set<String> communities = communityMap.keySet();
+			List<String> toDumpFor = inputContext.get().stream().map(c -> {
-        Dataset<I> tmp = Utils.readPath(spark, inputPath, inputClazz);
+				if (communities.contains(c.getId())) {
-        tmp.map(value -> {
+					return c.getId();
-            Optional<List<Context>> inputContext = Optional.ofNullable(value.getContext());
+				}
-            if(!inputContext.isPresent()){
+				if (c.getId().contains("::") && communities.contains(c.getId().substring(0, c.getId().indexOf("::")))) {
-                return null;
+					return c.getId().substring(0, 3);
-            }
+				}
-            List<String> toDumpFor = inputContext.get().stream().map(c -> {
+				return null;
-                if (communities.contains(c.getId())) {
+			}).filter(Objects::nonNull).collect(Collectors.toList());
-                    return c.getId();
+			if (toDumpFor.size() == 0) {
-                }
+				return null;
-                return null;
+			}
-            }).filter(Objects::nonNull).collect(Collectors.toList());
+			return Mapper.map(value, communityMap);
-            if(toDumpFor.size() == 0){
+		}
-                return null;
+	}
            }
            return Mapper.map(value, communityMap);
        },Encoders.bean(dumpClazz))
        .write()
        .mode(SaveMode.Overwrite)
        .option("compression","gzip")
        .json(outputPath);
    }
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkPrepareResultProject.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkPrepareResultProject.java
@ -1,11 +1,13 @@
 package eu.dnetlib.dhp.oa.graph.dump;
-import com.fasterxml.jackson.databind.ObjectMapper;
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
-import eu.dnetlib.dhp.schema.dump.oaf.Projects;
+import java.io.Serializable;
-import eu.dnetlib.dhp.schema.dump.oaf.Result;
+import java.io.StringReader;
-import eu.dnetlib.dhp.schema.oaf.Project;
+import java.util.*;
-import eu.dnetlib.dhp.schema.oaf.Relation;
+import java.util.stream.Collectors;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
@ -14,86 +16,165 @@ import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.Element;
 import org.dom4j.Node;
 import org.dom4j.io.SAXReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.dump.oaf.Funder;
 import eu.dnetlib.dhp.schema.dump.oaf.Projects;
 import eu.dnetlib.dhp.schema.dump.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.Field;
 import eu.dnetlib.dhp.schema.oaf.Project;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import scala.Tuple2;
-import java.io.Serializable;
+public class SparkPrepareResultProject implements Serializable {
-import java.util.Arrays;
+	private static final Logger log = LoggerFactory.getLogger(SparkPrepareResultProject.class);
 import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkPrepareResultProject.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/oa/graph/dump/project_prepare_parameters.json"));
-public class SparkPrepareResultProject  implements Serializable {
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
-    private static final Logger log = LoggerFactory.getLogger(SparkPrepareResultProject.class);
+		parser.parseArgument(args);
 		Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-    public static void main(String[] args) throws Exception {
+		final String inputPath = parser.get("sourcePath");
-        String jsonConfiguration = IOUtils
+		log.info("inputPath: {}", inputPath);
                .toString(
                        SparkPrepareResultProject.class
                                .getResourceAsStream(
                                        "/eu/dnetlib/dhp/oa/graph/dump/project_prepare_parameters.json"));
-        final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		final String outputPath = parser.get("outputPath");
-        parser.parseArgument(args);
+		log.info("outputPath: {}", outputPath);
-        Boolean isSparkSessionManaged = Optional
+		SparkConf conf = new SparkConf();
                .ofNullable(parser.get("isSparkSessionManaged"))
                .map(Boolean::valueOf)
                .orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-        final String inputPath = parser.get("sourcePath");
+		runWithSparkSession(
-        log.info("inputPath: {}", inputPath);
+			conf,
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
 				prepareResultProjectList(spark, inputPath, outputPath);
 			});
 	}
-        final String outputPath = parser.get("outputPath");
+	private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) {
-        log.info("outputPath: {}", outputPath);
+		Dataset<Relation> relation = Utils
 			.readPath(spark, inputPath + "/relation", Relation.class)
 			.filter("dataInfo.deletedbyinference = false and relClass = 'produces'");
 		Dataset<Project> projects = Utils.readPath(spark, inputPath + "/project", Project.class);
 		projects
 			.joinWith(relation, projects.col("id").equalTo(relation.col("source")))
 			.groupByKey(
 				(MapFunction<Tuple2<Project, Relation>, String>) value -> value._2().getTarget(), Encoders.STRING())
 			.mapGroups((MapGroupsFunction<String, Tuple2<Project, Relation>, ResultProject>) (s, it) -> {
 				Set<String> projectSet = new HashSet<>();
 				Tuple2<Project, Relation> first = it.next();
 				ResultProject rp = new ResultProject();
 				rp.setResultId(first._2().getTarget());
 				Project p = first._1();
 				projectSet.add(p.getId());
 				Projects ps = Projects
 					.newInstance(
 						p.getId(), p.getCode().getValue(),
 						Optional
 							.ofNullable(p.getAcronym())
 							.map(a -> a.getValue())
 							.orElse(null),
 						Optional
 							.ofNullable(p.getTitle())
 							.map(v -> v.getValue())
 							.orElse(null),
 						Optional
 							.ofNullable(p.getFundingtree())
 							.map(
 								value -> value
 									.stream()
 									.map(ft -> getFunder(ft.getValue()))
 									.collect(Collectors.toList())
 									.get(0))
 							.orElse(null));
 				List<Projects> projList = new ArrayList<>();
 				projList.add(ps);
 				rp.setProjectsList(projList);
 				it.forEachRemaining(c -> {
 					Project op = c._1();
 					if (!projectSet.contains(op.getId())) {
 						projList
 							.add(
 								Projects
 									.newInstance(
 										op.getId(),
 										op.getCode().getValue(),
 										Optional
 											.ofNullable(op.getAcronym())
 											.map(a -> a.getValue())
 											.orElse(null),
 										Optional
 											.ofNullable(op.getTitle())
 											.map(v -> v.getValue())
 											.orElse(null),
 										Optional
 											.ofNullable(op.getFundingtree())
 											.map(
 												value -> value
 													.stream()
 													.map(ft -> getFunder(ft.getValue()))
 													.collect(Collectors.toList())
 													.get(0))
 											.orElse(null)));
 						projectSet.add(op.getId());
-        SparkConf conf = new SparkConf();
+					}
-        runWithSparkSession(
+				});
-                conf,
+				return rp;
-                isSparkSessionManaged,
+			}, Encoders.bean(ResultProject.class))
-                spark -> {
+			.write()
-                    Utils.removeOutputDir(spark, outputPath);
+			.mode(SaveMode.Overwrite)
-                    prepareResultProjectList(spark, inputPath, outputPath);
+			.option("compression", "gzip")
-                });
+			.json(outputPath);
-    }
+	}
-    private static void prepareResultProjectList(SparkSession spark, String inputPath, String outputPath) {
+	private static Funder getFunder(String fundingtree) {
-        Dataset<Relation> relation = Utils.readPath(spark, inputPath + "/relation" , Relation.class)
+		// ["<fundingtree><funder><id>nsf_________::NSF</id><shortname>NSF</shortname><name>National Science
-                .filter("dataInfo.deletedbyinference = false and relClass = 'produces'");
+		// Foundation</name><jurisdiction>US</jurisdiction></funder><funding_level_1><id>nsf_________::NSF::CISE/OAD::CISE/CCF</id><description>Division
-        Dataset<Project> projects = Utils.readPath(spark, inputPath + "/project" , Project.class);
+		// of Computing and Communication Foundations</description><name>Division of Computing and Communication
 		// Foundations</name><parent><funding_level_0><id>nsf_________::NSF::CISE/OAD</id><description>Directorate for
 		// Computer &amp; Information Science &amp; Engineering</description><name>Directorate for Computer &amp;
 		// Information Science &amp;
 		// Engineering</name><parent/><class>nsf:fundingStream</class></funding_level_0></parent></funding_level_1></fundingtree>"]
 		Funder f = new Funder();
 		final Document doc;
 		try {
 			doc = new SAXReader().read(new StringReader(fundingtree));
 			f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText());
 			f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText());
 			f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText());
 			for (Object o : doc.selectNodes("//funding_level_0")) {
 				List node = ((Node) o).selectNodes("./name");
 				f.setFundingStream(((Node) node.get(0)).getText());
-        projects.joinWith(relation, projects.col("id").equalTo(relation.col("source")))
+			}
-                .groupByKey((MapFunction<Tuple2<Project,Relation>,String>)value -> value._2().getTarget(), Encoders.STRING())
+
-                .mapGroups((MapGroupsFunction<String, Tuple2<Project,Relation>, ResultProject>) (s, it) ->
+			return f;
-                {
+		} catch (DocumentException e) {
-                    Tuple2<Project, Relation> first = it.next();
+			e.printStackTrace();
-                    ResultProject rp = new ResultProject();
+		}
-                    rp.setResultId(first._2().getTarget());
+		return f;
-                    Project p = first._1();
+	}
                    Projects ps = Projects.newInstance(p.getId(), p.getCode().getValue(), p.getAcronym().getValue(),
                            p.getTitle().getValue(), p.getFundingtree()
                                    .stream()
                                    .map(ft -> ft.getValue()).collect(Collectors.toList()));
                    List<Projects>  projList = Arrays.asList(ps);
                    rp.setProjectsList(projList);
                    it.forEachRemaining(c -> {
                        Project op = c._1();
                        projList.add(Projects.newInstance(op.getId(), op.getCode().getValue(),
                                op.getAcronym().getValue(), op.getTitle().getValue(),
                                op.getFundingtree().stream().map(ft -> ft.getValue()).collect(Collectors.toList())));
                    });
                    return rp;
                } ,Encoders.bean(ResultProject.class))
        .write()
        .mode(SaveMode.Overwrite)
        .option("compression", "gzip")
        .json(outputPath);
    }
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkSplitForCommunity.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkSplitForCommunity.java
@ -1,98 +1,122 @@
 package eu.dnetlib.dhp.oa.graph.dump;
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 import java.io.Serializable;
 import java.io.StringReader;
 import java.util.*;
 import java.util.stream.Collectors;
 import eu.dnetlib.dhp.schema.dump.oaf.Result;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.Element;
 import org.dom4j.io.SAXReader;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import java.io.Serializable;
+import com.google.gson.Gson;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
-import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.dump.oaf.Result;
 import eu.dnetlib.dhp.utils.ISLookupClientFactory;
 import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 public class SparkSplitForCommunity implements Serializable {
-    private static final Logger log = LoggerFactory.getLogger(SparkSplitForCommunity.class);
+	private static final Logger log = LoggerFactory.getLogger(SparkSplitForCommunity.class);
 	public static void main(String[] args) throws Exception {
 		String jsonConfiguration = IOUtils
 			.toString(
 				SparkSplitForCommunity.class
 					.getResourceAsStream(
 						"/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json"));
-    public static void main(String[] args) throws Exception {
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
-        String jsonConfiguration = IOUtils
+		parser.parseArgument(args);
                .toString(
                        SparkSplitForCommunity.class
                                .getResourceAsStream(
                                        "/eu/dnetlib/dhp/oa/graph/dump/split_parameters.json"));
-        final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		Boolean isSparkSessionManaged = Optional
-        parser.parseArgument(args);
+			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-        Boolean isSparkSessionManaged = Optional
+		final String inputPath = parser.get("sourcePath");
-                .ofNullable(parser.get("isSparkSessionManaged"))
+		log.info("inputPath: {}", inputPath);
                .map(Boolean::valueOf)
                .orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
 		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);
-        final String inputPath = parser.get("sourcePath");
+		final String resultClassName = parser.get("resultTableName");
-        log.info("inputPath: {}", inputPath);
+		log.info("resultTableName: {}", resultClassName);
-        final String outputPath = parser.get("outputPath");
+		final String isLookUpUrl = parser.get("isLookUpUrl");
-        log.info("outputPath: {}", outputPath);
+		log.info("isLookUpUrl: {}", isLookUpUrl);
-        final String resultClassName = parser.get("resultTableName");
+		final Optional<String> cm = Optional.ofNullable(parser.get("communityMap"));
        log.info("resultTableName: {}", resultClassName);
-        final String isLookUpUrl = parser.get("isLookUpUrl");
+		Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
        log.info("isLookUpUrl: {}", isLookUpUrl);
 		SparkConf conf = new SparkConf();
-        Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
+		CommunityMap communityMap;
-        SparkConf conf = new SparkConf();
+		if (!isLookUpUrl.equals("BASEURL:8280/is/services/isLookUp")) {
 			QueryInformationSystem queryInformationSystem = new QueryInformationSystem();
 			queryInformationSystem.setIsLookUp(getIsLookUpService(isLookUpUrl));
 			communityMap = queryInformationSystem.getCommunityMap();
 		} else {
 			communityMap = new Gson().fromJson(cm.get(), CommunityMap.class);
 		}
-        Map<String,String>
+		runWithSparkSession(
-                communityMap = QueryInformationSystem.getCommunityMap(isLookUpUrl);
+			conf,
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
 				execSplit(spark, inputPath, outputPath, communityMap.keySet(), inputClazz);
 			});
 	}
 	public static ISLookUpService getIsLookUpService(String isLookUpUrl) {
 		return ISLookupClientFactory.getLookUpService(isLookUpUrl);
 	}
-        runWithSparkSession(
+	private static <R extends Result> void execSplit(SparkSession spark, String inputPath, String outputPath,
-                conf,
+		Set<String> communities, Class<R> inputClazz) {
                isSparkSessionManaged,
                spark -> {
                    Utils.removeOutputDir(spark, outputPath);
                    execSplit(spark, inputPath, outputPath , communityMap.keySet(), inputClazz);
                });
    }
-    private static <R extends Result> void execSplit(SparkSession spark, String inputPath, String outputPath, Set<String> communities
+		Dataset<R> result = Utils.readPath(spark, inputPath, inputClazz);
            , Class<R> inputClazz) {
-        Dataset<R> result = Utils.readPath(spark, inputPath, inputClazz);
+		communities
 			.stream()
 			.forEach(c -> printResult(c, result, outputPath));
-        communities.stream()
+	}
                .forEach(c -> printResult(c, result, outputPath));
-    }
+	private static <R extends Result> void printResult(String c, Dataset<R> result, String outputPath) {
 		result
 			.filter(r -> containsCommunity(r, c))
 			.write()
 			.option("compression", "gzip")
 			.mode(SaveMode.Append)
 			.json(outputPath + "/" + c);
 	}
-    private static <R extends Result> void printResult(String c, Dataset<R> result, String outputPath) {
+	private static <R extends Result> boolean containsCommunity(R r, String c) {
-        result.filter(r -> containsCommunity(r, c))
+		if (Optional.ofNullable(r.getContext()).isPresent()) {
-                .write()
+			return r
-                .option("compression","gzip")
+				.getContext()
-                .mode(SaveMode.Append)
+				.stream()
-                .json(outputPath + "/" + c);
+				.filter(con -> con.getCode().equals(c))
-    }
+				.collect(Collectors.toList())
-
+				.size() > 0;
-    private static <R extends Result> boolean containsCommunity(R r, String c) {
+		}
-        if(Optional.ofNullable(r.getContext()).isPresent()) {
+		return false;
-            return r.getContext().stream().filter(con -> con.getCode().equals(c)).collect(Collectors.toList()).size() > 0;
+	}
        }
        return false;
    }
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkUpdateProjectInfo.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SparkUpdateProjectInfo.java
@ -1,11 +1,11 @@
 package eu.dnetlib.dhp.oa.graph.dump;
-import com.fasterxml.jackson.databind.ObjectMapper;
+import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
-import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
 import java.io.Serializable;
 import java.util.Optional;
 import eu.dnetlib.dhp.schema.dump.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.Project;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import org.apache.commons.io.IOUtils;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
@ -16,84 +16,86 @@ import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.dump.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.Project;
 import eu.dnetlib.dhp.schema.oaf.Relation;
 import scala.Tuple2;
 import java.io.Serializable;
 import java.util.Optional;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
 public class SparkUpdateProjectInfo implements Serializable {
-    private static final Logger log = LoggerFactory.getLogger(SparkUpdateProjectInfo.class);
+	private static final Logger log = LoggerFactory.getLogger(SparkUpdateProjectInfo.class);
-    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-    public static void main(String[] args) throws Exception {
+	public static void main(String[] args) throws Exception {
-        String jsonConfiguration = IOUtils
+		String jsonConfiguration = IOUtils
-                .toString(
+			.toString(
-                        SparkUpdateProjectInfo.class
+				SparkUpdateProjectInfo.class
-                                .getResourceAsStream(
+					.getResourceAsStream(
-                                        "/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json"));
+						"/eu/dnetlib/dhp/oa/graph/dump/project_input_parameters.json"));
-        final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
+		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
-        parser.parseArgument(args);
+		parser.parseArgument(args);
-        Boolean isSparkSessionManaged = Optional
+		Boolean isSparkSessionManaged = Optional
-                .ofNullable(parser.get("isSparkSessionManaged"))
+			.ofNullable(parser.get("isSparkSessionManaged"))
-                .map(Boolean::valueOf)
+			.map(Boolean::valueOf)
-                .orElse(Boolean.TRUE);
+			.orElse(Boolean.TRUE);
-        log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
+		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
-        final String inputPath = parser.get("sourcePath");
+		final String inputPath = parser.get("sourcePath");
-        log.info("inputPath: {}", inputPath);
+		log.info("inputPath: {}", inputPath);
-        final String outputPath = parser.get("outputPath");
+		final String outputPath = parser.get("outputPath");
-        log.info("outputPath: {}", outputPath);
+		log.info("outputPath: {}", outputPath);
-        final String resultClassName = parser.get("resultTableName");
+		final String resultClassName = parser.get("resultTableName");
-        log.info("resultTableName: {}", resultClassName);
+		log.info("resultTableName: {}", resultClassName);
 		final String preparedInfoPath = parser.get("preparedInfoPath");
 		log.info("preparedInfoPath: {}", preparedInfoPath);
-        final String preparedInfoPath = parser.get("preparedInfoPath");
+		Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
        log.info("preparedInfoPath: {}", preparedInfoPath);
-        Class<? extends Result> inputClazz = (Class<? extends Result>) Class.forName(resultClassName);
+		SparkConf conf = new SparkConf();
-        SparkConf conf = new SparkConf();
+		runWithSparkSession(
 			conf,
 			isSparkSessionManaged,
 			spark -> {
 				Utils.removeOutputDir(spark, outputPath);
 				extend(spark, inputPath, outputPath, preparedInfoPath, inputClazz);
 			});
 	}
-        runWithSparkSession(
+	private static <R extends Result> void extend(
-                conf,
+		SparkSession spark,
-                isSparkSessionManaged,
+		String inputPath,
-                spark -> {
+		String outputPath,
-                    Utils.removeOutputDir(spark, outputPath);
+		String preparedInfoPath,
-                    extend(spark, inputPath, outputPath , preparedInfoPath,  inputClazz);
+		Class<R> inputClazz) {
                });
    }
-    private static <R extends Result > void extend(
+		Dataset<R> result = Utils.readPath(spark, inputPath, inputClazz);
-            SparkSession spark,
+		Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
-            String inputPath,
+		result
-            String outputPath,
+			.joinWith(
-            String preparedInfoPath,
+				resultProject, result.col("id").equalTo(resultProject.col("resultId")),
-            Class<R> inputClazz) {
+				"left")
 			.map(value -> {
 				R r = value._1();
 				Optional.ofNullable(value._2()).ifPresent(rp -> {
 					r.setProjects(rp.getProjectsList());
 				});
 				return r;
 			}, Encoders.bean(inputClazz))
 			.write()
 			.option("compression", "gzip")
 			.mode(SaveMode.Overwrite)
 			.json(outputPath);
-        Dataset<R> result = Utils.readPath(spark, inputPath , inputClazz);
+	}
        Dataset<ResultProject> resultProject = Utils.readPath(spark, preparedInfoPath, ResultProject.class);
        result.joinWith(resultProject, result.col("id").equalTo(resultProject.col("resultId")),
                "left")
                .map(value -> {
                    R r = value._1();
                    Optional.ofNullable(value._2()).ifPresent(rp -> {
                        r.setProjects(rp.getProjectsList());
                    });
                    return r;
                },Encoders.bean(inputClazz))
        .write()
        .option("compression","gzip")
        .mode(SaveMode.Overwrite)
        .json(outputPath);
-
+}
    }
    }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java
@ -1,23 +1,27 @@
 package eu.dnetlib.dhp.oa.graph.dump;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import eu.dnetlib.dhp.common.HdfsSupport;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.SparkSession;
-public class Utils {
+import com.fasterxml.jackson.databind.ObjectMapper;
    public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
    public static void removeOutputDir(SparkSession spark, String path) {
        HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
    }
-    public static <R> Dataset<R> readPath(
+import eu.dnetlib.dhp.common.HdfsSupport;
-            SparkSession spark, String inputPath, Class<R> clazz) {
+
-        return spark
+public class Utils {
-                .read()
+	public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-                .textFile(inputPath)
+
-                .map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
+	public static void removeOutputDir(SparkSession spark, String path) {
-    }
+		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
 	}
 	public static <R> Dataset<R> readPath(
 		SparkSession spark, String inputPath, Class<R> clazz) {
 		return spark
 			.read()
 			.textFile(inputPath)
 			.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
 	}
 }