Merge remote-tracking branch 'origin/master' into doiboost

2020-05-28 09:58:32 +02:00 · 2020-05-28 09:58:32 +02:00 · 02f90eeb07
parent 7d29b61c62 aac1515b58
commit 02f90eeb07
27 changed files with 2047 additions and 185 deletions
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/PacePerson.java
@ -1,5 +1,5 @@

-package eu.dnetlib.dhp.oa.graph.raw.common;
+package eu.dnetlib.dhp.common;

 import java.nio.charset.StandardCharsets;
 import java.text.Normalizer;
--- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java
+++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java
@ -61,12 +61,6 @@ public class BlackListTest {
 		spark.stop();
 	}

-	/*
-	 * String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); final String outputPath =
-	 * parser.get("outputPath"); log.info("outputPath {}: ", outputPath); final String blacklistPath =
-	 * parser.get("hdfsPath"); log.info("blacklistPath {}: ", blacklistPath); final String mergesPath =
-	 * parser.get("mergesPath"); log.info("mergesPath {}: ", mergesPath);
-	 */
 	@Test
 	public void noRemoveTest() throws Exception {
 		SparkRemoveBlacklistedRelationJob
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag;
 import static eu.dnetlib.dhp.PropagationConstant.removeOutputDir;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

+import java.util.ArrayList;
 import java.util.Optional;

 import org.apache.commons.io.IOUtils;
@ -100,6 +101,7 @@ public class SparkBulkTagJob {

 		ResultTagger resultTagger = new ResultTagger();
 		readPath(spark, inputPath, resultClazz)
+			.map(patchResult(), Encoders.bean(resultClazz))
 			.map(
 				(MapFunction<R, R>) value -> resultTagger
 					.enrichContextCriteria(
@ -119,4 +121,17 @@ public class SparkBulkTagJob {
 			.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
 	}

+	// TODO remove this hack as soon as the values fixed by this method will be provided as NON null
+	private static <R extends Result> MapFunction<R, R> patchResult() {
+		return (MapFunction<R, R>) r -> {
+			if (r.getDataInfo().getDeletedbyinference() == null) {
+				r.getDataInfo().setDeletedbyinference(false);
+			}
+			if (r.getContext() == null) {
+				r.setContext(new ArrayList<>());
+			}
+			return r;
+		};
+	}
+
 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java
@ -131,7 +131,7 @@ public class CommunityConfiguration implements Serializable {
 				p -> {
 					if (p.getSnd() == null)
 						return p.getFst();
-					if (((SelectionConstraints) p.getSnd()).verifyCriteria(param))
+					if (p.getSnd().verifyCriteria(param))
 						return p.getFst();
 					else
 						return null;
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java
@ -34,7 +34,7 @@ public class VerbResolver implements Serializable {
 				.collect(
 					Collectors
 						.toMap(
-							value -> (String) ((ClassInfo) value)
+							value -> (String) value
 								.getAnnotationInfo()
 								.get(0)
 								.getParameterValues()
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java
@ -77,9 +77,15 @@ public class PrepareDatasourceCountryAssociation {
 		List<String> allowedtypes,
 		String inputPath,
 		String outputPath) {
-		String whitelisted = "";
-		for (String i : whitelist) {
-			whitelisted += " OR id = '" + i + "'";
+		String whitelisted = " d.id = '" + whitelist.get(0) + "'";
+		for (int i = 1; i < whitelist.size(); i++) {
+			whitelisted += " OR d.id = '" + whitelist.get(i) + "'";
+		}
+
+		String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'";
+
+		for (int i = 1; i < allowedtypes.size(); i++) {
+			allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'";
 		}

 		Dataset<Datasource> datasource = readPath(spark, inputPath + "/datasource", Datasource.class);
@ -90,26 +96,39 @@ public class PrepareDatasourceCountryAssociation {
 		relation.createOrReplaceTempView("relation");
 		organization.createOrReplaceTempView("organization");

-		String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country "
-			+ "FROM ( SELECT id "
-			+ "       FROM datasource "
-			+ "       WHERE (datainfo.deletedbyinference = false "
-			+ whitelisted
-			+ ") "
-			+ getConstraintList("datasourcetype.classid = '", allowedtypes)
-			+ ") d "
-			+ "JOIN ( SELECT source, target "
-			+ "       FROM relation "
-			+ "       WHERE relclass = '"
-			+ ModelConstants.IS_PROVIDED_BY
-			+ "' "
-			+ "       AND datainfo.deletedbyinference = false ) rel "
-			+ "ON d.id = rel.source "
-			+ "JOIN (SELECT id, country "
-			+ "      FROM organization "
-			+ "      WHERE datainfo.deletedbyinference = false "
-			+ "      AND length(country.classid) > 0) o "
-			+ "ON o.id = rel.target";
+//		String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country "
+//			+ "FROM ( SELECT id "
+//			+ "       FROM datasource "
+//			+ "       WHERE (datainfo.deletedbyinference = false "
+//			+ whitelisted
+//			+ ") "
+//			+ getConstraintList("datasourcetype.classid = '", allowedtypes)
+//			+ ") d "
+//			+ "JOIN ( SELECT source, target "
+//			+ "       FROM relation "
+//			+ "       WHERE relclass = '"
+//			+ ModelConstants.IS_PROVIDED_BY
+//			+ "' "
+//			+ "       AND datainfo.deletedbyinference = false ) rel "
+//			+ "ON d.id = rel.source "
+//			+ "JOIN (SELECT id, country "
+//			+ "      FROM organization "
+//			+ "      WHERE datainfo.deletedbyinference = false "
+//			+ "      AND length(country.classid) > 0) o "
+//			+ "ON o.id = rel.target";
+
+		String query = "SELECT source dataSourceId, " +
+			"named_struct('classid', country.classid, 'classname', country.classname) country " +
+			"FROM datasource d " +
+			"JOIN relation rel " +
+			"ON d.id = rel.source " +
+			"JOIN organization o " +
+			"ON o.id = rel.target " +
+			"WHERE rel.datainfo.deletedbyinference = false  " +
+			"and rel.relclass = '" + ModelConstants.IS_PROVIDED_BY + "'" +
+			"and o.datainfo.deletedbyinference = false  " +
+			"and length(o.country.classid) > 0 " +
+			"and (" + allowed + " or " + whitelisted + ")";

 		spark
 			.sql(query)
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java
@ -4,7 +4,12 @@ package eu.dnetlib.dhp.countrypropagation;
 import static eu.dnetlib.dhp.PropagationConstant.*;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;

+import java.util.ArrayList;
+import java.util.Set;
+import java.util.stream.Collectors;
+
 import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.*;
 import org.apache.spark.sql.Dataset;
@ -13,6 +18,7 @@ import org.slf4j.LoggerFactory;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.oaf.*;
+import scala.Tuple2;

 public class PrepareResultCountrySet {
 	private static final Logger log = LoggerFactory.getLogger(PrepareResultCountrySet.class);
@ -60,6 +66,7 @@ public class PrepareResultCountrySet {
 			conf,
 			isSparkSessionManaged,
 			spark -> {
+				removeOutputDir(spark, outputPath);
 				getPotentialResultToUpdate(
 					spark,
 					inputPath,
@ -89,10 +96,33 @@ public class PrepareResultCountrySet {
 		spark
 			.sql(RESULT_COUNTRYSET_QUERY)
 			.as(Encoders.bean(ResultCountrySet.class))
-			.write()
-			.option("compression", "gzip")
-			.mode(SaveMode.Append)
-			.json(outputPath);
+			.toJavaRDD()
+			.mapToPair(value -> new Tuple2<>(value.getResultId(), value))
+			.reduceByKey((a, b) -> {
+				ArrayList<CountrySbs> countryList = a.getCountrySet();
+				Set<String> countryCodes = countryList
+					.stream()
+					.map(country -> country.getClassid())
+					.collect(Collectors.toSet());
+				b
+					.getCountrySet()
+					.stream()
+					.forEach(c -> {
+						if (!countryCodes.contains(c.getClassid())) {
+							countryList.add(c);
+							countryCodes.add(c.getClassid());
+						}
+
+					});
+				a.setCountrySet(countryList);
+				return a;
+			})
+			.map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2()))
+			.saveAsTextFile(outputPath, GzipCodec.class);
+//			.write()
+//			.option("compression", "gzip")
+//			.mode(SaveMode.Append)
+//			.json(outputPath);
 	}

 }
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java
@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.Lists;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.PacePerson;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.Result;
 import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
@ -121,30 +122,39 @@ public class SparkOrcidToResultFromSemRelJob {
 	}

 	private static void enrichAuthor(Author a, List<AutoritativeAuthor> au) {
+		PacePerson pp = new PacePerson(a.getFullname(), false);
 		for (AutoritativeAuthor aa : au) {
-			if (enrichAuthor(aa, a)) {
+			if (enrichAuthor(aa, a, pp.getNormalisedFirstName(), pp.getNormalisedSurname())) {
 				return;
 			}
 		}
 	}

-	private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) {
+	private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author,
+		String author_name,
+		String author_surname) {
 		boolean toaddpid = false;

 		if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
 			if (StringUtils.isNotEmpty(author.getSurname())) {
+				author_surname = author.getSurname();
+			}
+			if (StringUtils.isNotEmpty(author_surname)) {
 				if (autoritative_author
 					.getSurname()
 					.trim()
-					.equalsIgnoreCase(author.getSurname().trim())) {
+					.equalsIgnoreCase(author_surname.trim())) {

 					// have the same surname. Check the name
 					if (StringUtils.isNotEmpty(autoritative_author.getName())) {
 						if (StringUtils.isNotEmpty(author.getName())) {
+							author_name = author.getName();
+						}
+						if (StringUtils.isNotEmpty(author_name)) {
 							if (autoritative_author
 								.getName()
 								.trim()
-								.equalsIgnoreCase(author.getName().trim())) {
+								.equalsIgnoreCase(author_name.trim())) {
 								toaddpid = true;
 							}
 							// they could be differently written (i.e. only the initials of the name
@ -154,7 +164,7 @@ public class SparkOrcidToResultFromSemRelJob {
 									.getName()
 									.trim()
 									.substring(0, 0)
-									.equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
+									.equalsIgnoreCase(author_name.trim().substring(0, 0))) {
 									toaddpid = true;
 								}
 							}
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java
@ -105,11 +105,7 @@ public class SparkResultToProjectThroughSemRelJob {
 					.stream()
 					.forEach(
 						(p -> {
-							if (potential_update
-								.getProjectSet()
-								.contains(p)) {
 							potential_update.getProjectSet().remove(p);
-							}
 						}));
 			}
 			String resId = potential_update.getResultId();
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java
@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
 import java.util.*;

 import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.*;
@ -19,6 +20,7 @@ import com.google.gson.Gson;
 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Relation;
+import scala.Tuple2;

 public class PrepareResultCommunitySet {

@ -93,10 +95,24 @@ public class PrepareResultCommunitySet {
 		result_organizationset
 			.map(mapResultCommunityFn(organizationMap), Encoders.bean(ResultCommunityList.class))
 			.filter(Objects::nonNull)
-			.write()
-			.mode(SaveMode.Overwrite)
-			.option("compression", "gzip")
-			.json(outputPath);
+			.toJavaRDD()
+			.mapToPair(value -> new Tuple2<>(value.getResultId(), value))
+			.reduceByKey((a, b) -> {
+				ArrayList<String> cl = a.getCommunityList();
+				b.getCommunityList().stream().forEach(s -> {
+					if (!cl.contains(s)) {
+						cl.add(s);
+					}
+				});
+				a.setCommunityList(cl);
+				return a;
+			})
+			.map(value -> OBJECT_MAPPER.writeValueAsString(value._2()))
+			.saveAsTextFile(outputPath, GzipCodec.class);
+//                      .write()
+//                      .mode(SaveMode.Overwrite)
+//                      .option("compression", "gzip")
+//                      .json(outputPath);
 	}

 	private static MapFunction<ResultOrganizations, ResultCommunityList> mapResultCommunityFn(
--- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java
+++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java
@ -136,9 +136,7 @@ public class SparkResultToOrganizationFromIstRepoJob {
 					.stream()
 					.forEach(
 						rId -> {
-							if (organization_list.contains(rId)) {
 							organization_list.remove(rId);
-							}
 						});
 			}
 			String resultId = potential_update.getResultId();
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java
@ -10,10 +10,27 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.listFields;
 import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.oaiIProvenance;
 import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.qualifier;
 import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
-import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.NOT_AVAILABLE;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.REPOSITORY_PROVENANCE_ACTIONS;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.UNKNOWN;

-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;

 import org.apache.commons.lang3.StringUtils;
 import org.dom4j.Document;
@ -21,6 +38,7 @@ import org.dom4j.DocumentFactory;
 import org.dom4j.DocumentHelper;
 import org.dom4j.Node;

+import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
 import eu.dnetlib.dhp.schema.common.LicenseComparator;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.Context;
@ -43,7 +61,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;

 public abstract class AbstractMdRecordToOafMapper {

-	protected final Map<String, String> code2name;
+	protected final VocabularyGroup vocs;

 	protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
 	protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
@ -67,8 +85,8 @@ public abstract class AbstractMdRecordToOafMapper {
 	protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
 		"main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");

-	protected AbstractMdRecordToOafMapper(final Map<String, String> code2name) {
-		this.code2name = code2name;
+	protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs) {
+		this.vocs = vocs;
 	}

 	public List<Oaf> processMdRecord(final String xml) {
@ -247,10 +265,7 @@ public abstract class AbstractMdRecordToOafMapper {
 		r.setId(createOpenaireId(50, doc.valueOf("//dri:objIdentifier"), false));
 		r.setOriginalId(Arrays.asList(doc.valueOf("//dri:objIdentifier")));
 		r.setCollectedfrom(Arrays.asList(collectedFrom));
-		r
-			.setPid(
-				prepareListStructProps(
-					doc, "//oaf:identifier", "@identifierType", "dnet:pid_types", "dnet:pid_types", info));
+		r.setPid(prepareResultPids(doc, info));
 		r.setDateofcollection(doc.valueOf("//dr:dateOfCollection"));
 		r.setDateoftransformation(doc.valueOf("//dr:dateOfTransformation"));
 		r.setExtraInfo(new ArrayList<>()); // NOT PRESENT IN MDSTORES
@ -278,6 +293,8 @@ public abstract class AbstractMdRecordToOafMapper {
 		r.setBestaccessright(getBestAccessRights(instances));
 	}

+	protected abstract List<StructuredProperty> prepareResultPids(Document doc, DataInfo info);
+
 	private List<Context> prepareContexts(final Document doc, final DataInfo info) {
 		final List<Context> list = new ArrayList<>();
 		for (final Object o : doc.selectNodes("//oaf:concept")) {
@ -358,7 +375,7 @@ public abstract class AbstractMdRecordToOafMapper {

 	protected abstract Field<String> prepareDatasetStorageDate(Document doc, DataInfo info);

-	protected static Qualifier getBestAccessRights(List<Instance> instanceList) {
+	protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
 		if (instanceList != null) {
 			final Optional<Qualifier> min = instanceList
 				.stream()
@ -405,14 +422,12 @@ public abstract class AbstractMdRecordToOafMapper {
 		return null;
 	}

-	protected Qualifier prepareQualifier(
-		final Node node,
-		final String xpath,
-		final String schemeId,
-		final String schemeName) {
-		final String classId = node.valueOf(xpath);
-		final String className = code2name.get(classId);
-		return qualifier(classId, className, schemeId, schemeName);
+	protected Qualifier prepareQualifier(final Node node, final String xpath, final String schemeId) {
+		return prepareQualifier(node.valueOf(xpath).trim(), schemeId);
+	}
+
+	protected Qualifier prepareQualifier(final String classId, final String schemeId) {
+		return vocs.getTermAsQualifier(schemeId, classId);
 	}

 	protected List<StructuredProperty> prepareListStructProps(
@ -420,14 +435,31 @@ public abstract class AbstractMdRecordToOafMapper {
 		final String xpath,
 		final String xpathClassId,
 		final String schemeId,
-		final String schemeName,
 		final DataInfo info) {
 		final List<StructuredProperty> res = new ArrayList<>();
+
 		for (final Object o : node.selectNodes(xpath)) {
 			final Node n = (Node) o;
-			final String classId = n.valueOf(xpathClassId);
-			final String className = code2name.get(classId);
-			res.add(structuredProperty(n.getText(), classId, className, schemeId, schemeName, info));
+			final String classId = n.valueOf(xpathClassId).trim();
+			res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info));
+		}
+		return res;
+	}
+
+	protected List<StructuredProperty> prepareListStructPropsWithValidQualifier(
+		final Node node,
+		final String xpath,
+		final String xpathClassId,
+		final String schemeId,
+		final DataInfo info) {
+		final List<StructuredProperty> res = new ArrayList<>();
+
+		for (final Object o : node.selectNodes(xpath)) {
+			final Node n = (Node) o;
+			final String classId = n.valueOf(xpathClassId).trim();
+			if (vocs.termExists(schemeId, classId)) {
+				res.add(structuredProperty(n.getText(), vocs.getTermAsQualifier(schemeId, classId), info));
+			}
 		}
 		return res;
 	}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java
@ -4,8 +4,10 @@ package eu.dnetlib.dhp.oa.graph.raw;
 import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;

 import java.io.IOException;
-import java.sql.SQLException;
-import java.util.*;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
 import java.util.stream.Collectors;

 import org.apache.commons.io.IOUtils;
@ -24,10 +26,22 @@ import org.slf4j.LoggerFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;

 import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.common.DbClient;
 import eu.dnetlib.dhp.common.HdfsSupport;
+import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
 import eu.dnetlib.dhp.schema.common.ModelSupport;
-import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.schema.oaf.Dataset;
+import eu.dnetlib.dhp.schema.oaf.Datasource;
+import eu.dnetlib.dhp.schema.oaf.Oaf;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.Organization;
+import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
+import eu.dnetlib.dhp.schema.oaf.Project;
+import eu.dnetlib.dhp.schema.oaf.Publication;
+import eu.dnetlib.dhp.schema.oaf.Relation;
+import eu.dnetlib.dhp.schema.oaf.Software;
+import eu.dnetlib.dhp.utils.ISLookupClientFactory;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
 import scala.Tuple2;

 public class GenerateEntitiesApplication {
@ -40,13 +54,12 @@ public class GenerateEntitiesApplication {
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(
 			IOUtils
 				.toString(
-					MigrateMongoMdstoresApplication.class
-						.getResourceAsStream(
-							"/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json")));
+					GenerateEntitiesApplication.class
+						.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json")));

 		parser.parseArgument(args);

-		Boolean isSparkSessionManaged = Optional
+		final Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
@ -55,29 +68,28 @@ public class GenerateEntitiesApplication {
 		final String sourcePaths = parser.get("sourcePaths");
 		final String targetPath = parser.get("targetPath");

-		final String dbUrl = parser.get("postgresUrl");
-		final String dbUser = parser.get("postgresUser");
-		final String dbPassword = parser.get("postgresPassword");
+		// final String dbUrl = parser.get("postgresUrl");
+		// final String dbUser = parser.get("postgresUser");
+		// final String dbPassword = parser.get("postgresPassword");

-		final Map<String, String> code2name = loadClassNames(dbUrl, dbUser, dbPassword);
+		final String isLookupUrl = parser.get("isLookupUrl");

-		SparkConf conf = new SparkConf();
-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> {
+		final VocabularyGroup vocs = loadVocsFromIS(isLookupUrl); // MAP: vocId -> voc
+
+		final SparkConf conf = new SparkConf();
+		runWithSparkSession(conf, isSparkSessionManaged, spark -> {
 			removeOutputDir(spark, targetPath);
-				generateEntities(spark, code2name, sourcePaths, targetPath);
+			generateEntities(spark, vocs, sourcePaths, targetPath);
 		});
 	}

 	private static void generateEntities(
 		final SparkSession spark,
-		final Map<String, String> code2name,
+		final VocabularyGroup vocs,
 		final String sourcePaths,
 		final String targetPath) {

-		JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
+		final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
 		final List<String> existingSourcePaths = Arrays
 			.stream(sourcePaths.split(","))
 			.filter(p -> exists(sc, p))
@ -94,7 +106,7 @@ public class GenerateEntitiesApplication {
 					sc
 						.sequenceFile(sp, Text.class, Text.class)
 						.map(k -> new Tuple2<>(k._1().toString(), k._2().toString()))
-						.map(k -> convertToListOaf(k._1(), k._2(), code2name))
+						.map(k -> convertToListOaf(k._1(), k._2(), vocs))
 						.filter(Objects::nonNull)
 						.flatMap(list -> list.iterator()));
 		}
@ -110,7 +122,7 @@ public class GenerateEntitiesApplication {
 			.saveAsTextFile(targetPath, GzipCodec.class);
 	}

-	private static Oaf merge(Oaf o1, Oaf o2) {
+	private static Oaf merge(final Oaf o1, final Oaf o2) {
 		if (ModelSupport.isSubClass(o1, OafEntity.class)) {
 			((OafEntity) o1).mergeFrom((OafEntity) o2);
 		} else if (ModelSupport.isSubClass(o1, Relation.class)) {
@ -122,14 +134,16 @@ public class GenerateEntitiesApplication {
 	}

 	private static List<Oaf> convertToListOaf(
-		final String id, final String s, final Map<String, String> code2name) {
+		final String id,
+		final String s,
+		final VocabularyGroup vocs) {
 		final String type = StringUtils.substringAfter(id, ":");

 		switch (type.toLowerCase()) {
 			case "native_oaf":
-				return new OafToOafMapper(code2name).processMdRecord(s);
+				return new OafToOafMapper(vocs).processMdRecord(s);
 			case "native_odf":
-				return new OdfToOafMapper(code2name).processMdRecord(s);
+				return new OdfToOafMapper(vocs).processMdRecord(s);
 			case "datasource":
 				return Arrays.asList(convertFromJson(s, Datasource.class));
 			case "organization":
@ -151,29 +165,33 @@ public class GenerateEntitiesApplication {
 		}
 	}

-	private static Map<String, String> loadClassNames(
-		final String dbUrl, final String dbUser, final String dbPassword) throws IOException {
+	private static VocabularyGroup loadVocsFromIS(final String isLookupUrl) throws IOException, ISLookUpException {
+		final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookupUrl);

-		log.info("Loading vocabulary terms from db...");
+		final String xquery = IOUtils
+			.toString(
+				GenerateEntitiesApplication.class
+					.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery"));

-		final Map<String, String> map = new HashMap<>();
+		final VocabularyGroup vocs = new VocabularyGroup();

-		try (DbClient dbClient = new DbClient(dbUrl, dbUser, dbPassword)) {
-			dbClient
-				.processResults(
-					"select code, name from class",
-					rs -> {
-						try {
-							map.put(rs.getString("code"), rs.getString("name"));
-						} catch (final SQLException e) {
-							e.printStackTrace();
-						}
-					});
+		for (final String s : isLookUpService.quickSearchProfile(xquery)) {
+			final String[] arr = s.split("@=@");
+			if (arr.length == 4) {
+				final String vocId = arr[0].trim();
+				final String vocName = arr[1].trim();
+				final String termId = arr[2].trim();
+				final String termName = arr[3].trim();
+
+				if (!vocs.vocabularyExists(vocId)) {
+					vocs.addVocabulary(vocId, vocName);
 				}

-		log.info("Found " + map.size() + " terms.");
+				vocs.addTerm(vocId, termId, termName);
+			}
+		}

-		return map;
+		return vocs;
 	}

 	private static Oaf convertFromJson(final String s, final Class<? extends Oaf> clazz) {
@ -196,7 +214,7 @@ public class GenerateEntitiesApplication {
 		}
 	}

-	private static void removeOutputDir(SparkSession spark, String path) {
+	private static void removeOutputDir(final SparkSession spark, final String path) {
 		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
 	}
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java
@ -6,6 +6,7 @@ import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.field;
 import static eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils.structuredProperty;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET;
@ -13,7 +14,6 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT;

 import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
 import java.util.stream.Collectors;

 import org.apache.commons.lang3.StringUtils;
@ -23,7 +23,8 @@ import org.dom4j.Node;

 import com.google.common.collect.Lists;

-import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
+import eu.dnetlib.dhp.common.PacePerson;
+import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.Field;
@ -36,8 +37,8 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;

 public class OafToOafMapper extends AbstractMdRecordToOafMapper {

-	public OafToOafMapper(final Map<String, String> code2name) {
-		super(code2name);
+	public OafToOafMapper(final VocabularyGroup vocs) {
+		super(vocs);
 	}

 	@Override
@ -83,7 +84,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {

 	@Override
 	protected Qualifier prepareLanguages(final Document doc) {
-		return prepareQualifier(doc, "//dc:language", DNET_LANGUAGES, DNET_LANGUAGES);
+		return prepareQualifier(doc, "//dc:language", DNET_LANGUAGES);
 	}

 	@Override
@ -130,14 +131,13 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {

 		final Instance instance = new Instance();
 		instance
-			.setInstancetype(
-				prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
+			.setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE));
 		instance.setCollectedfrom(collectedfrom);
 		instance.setHostedby(hostedby);
 		instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
 		instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
 		instance
-			.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
+			.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
 		instance.setLicense(field(doc.valueOf("//oaf:license"), info));
 		instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
 		instance
@ -297,4 +297,10 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper {
 	protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
 		return null; // NOT PRESENT IN OAF
 	}
+
+	@Override
+	protected List<StructuredProperty> prepareResultPids(final Document doc, final DataInfo info) {
+		return prepareListStructPropsWithValidQualifier(
+			doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info);
+	}
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java
@ -8,6 +8,7 @@ import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_ACCESS_MODES;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_DATE;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_DATA_CITE_RESOURCE;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_LANGUAGES;
+import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PID_TYPES;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PUBLICATION_RESOURCE;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTS;
 import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PART_OF;
@ -21,14 +22,14 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;

 import org.apache.commons.lang3.StringUtils;
 import org.dom4j.Document;
 import org.dom4j.Node;

-import eu.dnetlib.dhp.oa.graph.raw.common.PacePerson;
+import eu.dnetlib.dhp.common.PacePerson;
+import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.DataInfo;
 import eu.dnetlib.dhp.schema.oaf.Field;
@ -43,8 +44,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {

 	public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/";

-	public OdfToOafMapper(final Map<String, String> code2name) {
-		super(code2name);
+	public OdfToOafMapper(final VocabularyGroup vocs) {
+		super(vocs);
 	}

 	@Override
@ -120,14 +121,13 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {

 		final Instance instance = new Instance();
 		instance
-			.setInstancetype(
-				prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE));
+			.setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE));
 		instance.setCollectedfrom(collectedfrom);
 		instance.setHostedby(hostedby);
 		instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info));
 		instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation"));
 		instance
-			.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES, DNET_ACCESS_MODES));
+			.setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES));
 		instance.setLicense(field(doc.valueOf("//oaf:license"), info));
 		instance.setRefereed(field(doc.valueOf("//oaf:refereed"), info));
 		instance.setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info));
@ -211,7 +211,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {

 	@Override
 	protected Qualifier prepareLanguages(final Document doc) {
-		return prepareQualifier(doc, "//datacite:language", DNET_LANGUAGES, DNET_LANGUAGES);
+		return prepareQualifier(doc, "//datacite:language", DNET_LANGUAGES);
 	}

 	@Override
@ -239,7 +239,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {

 	@Override
 	protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) {
-		return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages", "dnet:programming_languages");
+		return prepareQualifier(doc, "//datacite:format", "dnet:programming_languages");
 	}

 	@Override
@ -366,7 +366,26 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
 	@Override
 	protected Qualifier prepareResourceType(final Document doc, final DataInfo info) {
 		return prepareQualifier(
-			doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE,
-			DNET_DATA_CITE_RESOURCE);
+			doc, "//*[local-name() = 'resource']//*[local-name() = 'resourceType']", DNET_DATA_CITE_RESOURCE);
 	}
+
+	@Override
+	protected List<StructuredProperty> prepareResultPids(final Document doc, final DataInfo info) {
+		final List<StructuredProperty> res = new ArrayList<>();
+		res
+			.addAll(
+				prepareListStructPropsWithValidQualifier(
+					doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info));
+		res
+			.addAll(
+				prepareListStructPropsWithValidQualifier(
+					doc, "//datacite:identifier[@identifierType != 'URL']", "@identifierType", DNET_PID_TYPES, info));
+		res
+			.addAll(
+				prepareListStructPropsWithValidQualifier(
+					doc, "//datacite:alternateIdentifier[@alternateIdentifierType != 'URL']",
+					"@alternateIdentifierType", DNET_PID_TYPES, info));
+		return res;
+	}
+
 }
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/Vocabulary.java
@ -0,0 +1,42 @@
+
+package eu.dnetlib.dhp.oa.graph.raw.common;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class Vocabulary {
+
+	private final String id;
+	private final String name;
+
+	private final Map<String, VocabularyTerm> terms = new HashMap<>();
+
+	public Vocabulary(final String id, final String name) {
+		this.id = id;
+		this.name = name;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	protected Map<String, VocabularyTerm> getTerms() {
+		return terms;
+	}
+
+	public VocabularyTerm getTerm(final String id) {
+		return terms.get(id.toLowerCase());
+	}
+
+	protected void addTerm(final String id, final String name) {
+		terms.put(id.toLowerCase(), new VocabularyTerm(id, name));
+	}
+
+	protected boolean termExists(final String id) {
+		return terms.containsKey(id.toLowerCase());
+	}
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyGroup.java
@ -0,0 +1,49 @@
+
+package eu.dnetlib.dhp.oa.graph.raw.common;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import eu.dnetlib.dhp.schema.oaf.Qualifier;
+
+public class VocabularyGroup {
+
+	private final Map<String, Vocabulary> vocs = new HashMap<>();
+
+	public void addVocabulary(final String id, final String name) {
+		vocs.put(id.toLowerCase(), new Vocabulary(id, name));
+	}
+
+	public void addTerm(final String vocId, final String id, final String name) {
+		if (vocabularyExists(vocId)) {
+			vocs.get(vocId.toLowerCase()).addTerm(id, name);
+		}
+	}
+
+	public VocabularyTerm getTerm(final String vocId, final String id) {
+		if (termExists(vocId, id)) {
+			return vocs.get(vocId.toLowerCase()).getTerm(id);
+		} else {
+			return new VocabularyTerm(id, id);
+		}
+	}
+
+	public Qualifier getTermAsQualifier(final String vocId, final String id) {
+		if (termExists(vocId, id)) {
+			final Vocabulary v = vocs.get(vocId.toLowerCase());
+			final VocabularyTerm t = v.getTerm(id);
+			return OafMapperUtils.qualifier(t.getId(), t.getName(), v.getId(), v.getName());
+		} else {
+			return OafMapperUtils.qualifier(id, id, vocId, vocId);
+		}
+	}
+
+	public boolean termExists(final String vocId, final String id) {
+		return vocabularyExists(vocId) && vocs.get(vocId.toLowerCase()).termExists(id);
+	}
+
+	public boolean vocabularyExists(final String vocId) {
+		return vocs.containsKey(vocId.toLowerCase());
+	}
+
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/VocabularyTerm.java
@ -0,0 +1,22 @@
+
+package eu.dnetlib.dhp.oa.graph.raw.common;
+
+public class VocabularyTerm {
+
+	private final String id;
+	private final String name;
+
+	public VocabularyTerm(final String id, final String name) {
+		this.id = id;
+		this.name = name;
+	}
+
+	public String getId() {
+		return id;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+}
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/generate_entities_parameters.json
@ -18,22 +18,9 @@
 		"paramRequired": true
 	},
 	{
-		"paramName": "pgurl",
-		"paramLongName": "postgresUrl",
-		"paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb",
+		"paramName": "islookup",
+		"paramLongName": "islookup",
+		"paramDescription": "the url of the ISLookupService",
 		"paramRequired": true
-	},
-	{
-		"paramName": "pguser",
-		"paramLongName": "postgresUser",
-		"paramDescription": "postgres user",
-		"paramRequired": false
-	},
-	{
-		"paramName": "pgpasswd",
-		"paramLongName": "postgresPassword",
-		"paramDescription": "postgres password",
-		"paramRequired": false
 	}
-
 ]
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml
@ -34,6 +34,10 @@
            <name>mongoDb</name>
            <description>mongo database</description>
        </property>
+        <property>
+            <name>isLookupUrl</name>
+            <description>the address of the lookUp service</description>
+        </property>

        <property>
            <name>sparkDriverMemory</name>
@ -233,9 +237,7 @@
            </spark-opts>
            <arg>--sourcePaths</arg><arg>${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims</arg>
            <arg>--targetPath</arg><arg>${workingDir}/entities_claim</arg>
-            <arg>--postgresUrl</arg><arg>${postgresURL}</arg>
-            <arg>--postgresUser</arg><arg>${postgresUser}</arg>
-            <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
+            <arg>--islookup</arg><arg>${isLookupUrl}</arg>
        </spark>
        <ok to="GenerateGraph_claims"/>
        <error to="Kill"/>
@ -282,9 +284,7 @@
            </spark-opts>
            <arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/oaf_records,${contentPath}/odf_records</arg>
            <arg>--targetPath</arg><arg>${workingDir}/entities</arg>
-            <arg>--postgresUrl</arg><arg>${postgresURL}</arg>
-            <arg>--postgresUser</arg><arg>${postgresUser}</arg>
-            <arg>--postgresPassword</arg><arg>${postgresPassword}</arg>
+            <arg>--islookup</arg><arg>${isLookupUrl}</arg>
        </spark>
        <ok to="GenerateGraph"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_step2/oozie_app/workflow.xml
@ -9,17 +9,10 @@
            <description>the temporary path to store entities before dispatching</description>
        </property>
        <property>
-            <name>postgresURL</name>
-            <description>the postgres URL to access to the database</description>
-        </property>
-        <property>
-            <name>postgresUser</name>
-            <description>the user postgres</description>
-        </property>
-        <property>
-            <name>postgresPassword</name>
-            <description>the password postgres</description>
+            <name>isLookupUrl</name>
+            <description>the address of the lookUp service</description>
        </property>
+ 
        <property>
            <name>sparkDriverMemory</name>
            <description>memory for driver process</description>
@ -62,9 +55,7 @@
            <arg>-mt</arg> <arg>yarn-cluster</arg>
            <arg>-s</arg><arg>${migrationPathStep1}/db_records,${migrationPathStep1}/oaf_records,${migrationPathStep1}/odf_records</arg>
            <arg>-t</arg><arg>${migrationPathStep2}/all_entities</arg>
-            <arg>-pgurl</arg><arg>${postgresURL}</arg>
-            <arg>-pguser</arg><arg>${postgresUser}</arg>
-            <arg>-pgpasswd</arg><arg>${postgresPassword}</arg>
+            <arg>--islookup</arg><arg>${isLookupUrl}</arg>
        </spark>
        <ok to="End"/>
        <error to="Kill"/>
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/xquery/load_vocabularies.xquery
@ -0,0 +1,5 @@
+for $x in collection(' /db/DRIVER/VocabularyDSResources/VocabularyDSResourceType') 
+	let $vocid := $x//VOCABULARY_NAME/@code
+	let $vocname := $x//VOCABULARY_NAME/text()
+	for $term in ($x//TERM)
+		return concat($vocid,' @=@ ',$vocname,' @=@ ',$term/@code,' @=@ ',$term/@english_name)
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java
@ -9,7 +9,6 @@ import static org.mockito.Mockito.when;

 import java.io.IOException;
 import java.util.List;
-import java.util.Map;
 import java.util.Optional;

 import org.apache.commons.io.IOUtils;
@ -20,6 +19,8 @@ import org.junit.jupiter.api.extension.ExtendWith;
 import org.mockito.Mock;
 import org.mockito.junit.jupiter.MockitoExtension;

+import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils;
+import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
 import eu.dnetlib.dhp.schema.common.ModelConstants;
 import eu.dnetlib.dhp.schema.oaf.Author;
 import eu.dnetlib.dhp.schema.oaf.Dataset;
@ -34,18 +35,27 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 public class MappersTest {

 	@Mock
-	private Map<String, String> code2name;
+	private VocabularyGroup vocs;

 	@BeforeEach
 	public void setUp() throws Exception {
-		when(code2name.get(anyString())).thenAnswer(invocation -> invocation.getArgument(0));
+		when(vocs.getTermAsQualifier(anyString(), anyString()))
+			.thenAnswer(
+				invocation -> OafMapperUtils
+					.qualifier(
+						invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0),
+						invocation.getArgument(0)));
+
+		when(vocs.termExists(anyString(), anyString())).thenReturn(true);
+
 	}

 	@Test
 	void testPublication() throws IOException {
+
 		final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml"));

-		final List<Oaf> list = new OafToOafMapper(code2name).processMdRecord(xml);
+		final List<Oaf> list = new OafToOafMapper(vocs).processMdRecord(xml);

 		assertEquals(3, list.size());
 		assertTrue(list.get(0) instanceof Publication);
@ -86,6 +96,10 @@ public class MappersTest {
 		assertTrue(StringUtils.isNotBlank(p.getJournal().getIssnOnline()));
 		assertTrue(StringUtils.isNotBlank(p.getJournal().getName()));

+		assertTrue(p.getPid().size() > 0);
+		assertEquals(p.getPid().get(0).getValue(), "10.3897/oneeco.2.e13718");
+		assertEquals(p.getPid().get(0).getQualifier().getClassid(), "doi");
+
 		assertNotNull(p.getInstance());
 		assertTrue(p.getInstance().size() > 0);
 		p
@ -115,6 +129,7 @@ public class MappersTest {
 		assertTrue(StringUtils.isNotBlank(r1.getRelType()));
 		assertTrue(StringUtils.isNotBlank(r2.getRelType()));

+		// System.out.println(new ObjectMapper().writeValueAsString(p));
 		// System.out.println(new ObjectMapper().writeValueAsString(r1));
 		// System.out.println(new ObjectMapper().writeValueAsString(r2));
 	}
@ -123,7 +138,7 @@ public class MappersTest {
 	void testDataset() throws IOException {
 		final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml"));

-		final List<Oaf> list = new OdfToOafMapper(code2name).processMdRecord(xml);
+		final List<Oaf> list = new OdfToOafMapper(vocs).processMdRecord(xml);

 		assertEquals(3, list.size());
 		assertTrue(list.get(0) instanceof Dataset);
@ -205,7 +220,7 @@ public class MappersTest {
 	void testSoftware() throws IOException {
 		final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml"));

-		final List<Oaf> list = new OdfToOafMapper(code2name).processMdRecord(xml);
+		final List<Oaf> list = new OdfToOafMapper(vocs).processMdRecord(xml);

 		assertEquals(1, list.size());
 		assertTrue(list.get(0) instanceof Software);
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
@ -769,7 +769,7 @@ public class XmlRecordFactory implements Serializable {
 							XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue()));
 				}
 				if (o.getLogourl() != null) {
-					metadata.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getLogourl().getValue()));
+					metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue()));
 				}

 				if (o.getEclegalbody() != null) {
@ -801,13 +801,13 @@ public class XmlRecordFactory implements Serializable {
 								.asXmlElement(
 									"echighereducation", o.getEchighereducation().getValue()));
 				}
-				if (o.getEcinternationalorganization() != null) {
+				if (o.getEcinternationalorganizationeurinterests() != null) {
 					metadata
 						.add(
 							XmlSerializationUtils
 								.asXmlElement(
 									"ecinternationalorganizationeurinterests",
-									o.getEcinternationalorganization().getValue()));
+									o.getEcinternationalorganizationeurinterests().getValue()));
 				}
 				if (o.getEcinternationalorganization() != null) {
 					metadata
--- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/child.st
+++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/template/child.st
@ -1,3 +1,3 @@
 <$name$$if(hasId)$ objidentifier="$id$"$else$$endif$>
-	$metadata:{$it$}$
+	$metadata:{ it | $it$ }$
 </$name$>
--- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
+++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java
@ -0,0 +1,47 @@
+package eu.dnetlib.dhp.oa.provision;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
+import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
+import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
+import org.apache.commons.io.IOUtils;
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.io.SAXReader;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class XmlRecordFactoryTest {
+
+    private static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource";
+
+    @Test
+    public void testXMLRecordFactory() throws IOException, DocumentException {
+
+        String json = IOUtils.toString(getClass().getResourceAsStream("joined_entity.json"));
+
+        assertNotNull(json);
+        JoinedEntity je = new ObjectMapper().readValue(json, JoinedEntity.class);
+        assertNotNull(je);
+
+        ContextMapper contextMapper = new ContextMapper();
+
+        XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, otherDsTypeId);
+
+        String xml = xmlRecordFactory.build(je);
+
+        assertNotNull(xml);
+
+        Document doc = new SAXReader().read(new StringReader(xml));
+
+        assertNotNull(doc);
+
+        System.out.println(doc.asXML());
+
+    }
+}
--- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/joined_entity.json
+++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/joined_entity.json