2021-08-23 11:58:35 +02:00
2 changed files with 73 additions and 43 deletions
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java
@ -27,14 +27,20 @@ import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
 import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
 import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
 import eu.dnetlib.dhp.schema.common.EntityType;
-import eu.dnetlib.dhp.schema.oaf.*;
+import eu.dnetlib.dhp.schema.oaf.Datasource;
+import eu.dnetlib.dhp.schema.oaf.Field;
+import eu.dnetlib.dhp.schema.oaf.OafEntity;
+import eu.dnetlib.dhp.schema.oaf.Organization;
+import eu.dnetlib.dhp.schema.oaf.Project;
+import eu.dnetlib.dhp.schema.oaf.Relation;
+import eu.dnetlib.dhp.schema.oaf.Result;
+import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
 import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits;
 import scala.Tuple2;

 /**
- * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type
- * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join
- * (R.target = T_i.id) save the tuples (R_i, T_i)
+ * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type E_i map E_i as RelatedEntity
+ * T_i to simplify the model and extracting only the necessary information join (R.target = T_i.id) save the tuples (R_i, T_i)
 */
 public class CreateRelatedEntitiesJob_phase1 {

@ -42,68 +48,65 @@ public class CreateRelatedEntitiesJob_phase1 {

 	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

-	public static void main(String[] args) throws Exception {
+	public static void main(final String[] args) throws Exception {

-		String jsonConfiguration = IOUtils
+		final String jsonConfiguration = IOUtils
 			.toString(
 				PrepareRelationsJob.class
-					.getResourceAsStream(
-						"/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json"));
+					.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json"));
 		final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
 		parser.parseArgument(args);

-		Boolean isSparkSessionManaged = Optional
+		final Boolean isSparkSessionManaged = Optional
 			.ofNullable(parser.get("isSparkSessionManaged"))
 			.map(Boolean::valueOf)
 			.orElse(Boolean.TRUE);
 		log.info("isSparkSessionManaged: {}", isSparkSessionManaged);

-		String inputRelationsPath = parser.get("inputRelationsPath");
+		final String inputRelationsPath = parser.get("inputRelationsPath");
 		log.info("inputRelationsPath: {}", inputRelationsPath);

-		String inputEntityPath = parser.get("inputEntityPath");
+		final String inputEntityPath = parser.get("inputEntityPath");
 		log.info("inputEntityPath: {}", inputEntityPath);

-		String outputPath = parser.get("outputPath");
+		final String outputPath = parser.get("outputPath");
 		log.info("outputPath: {}", outputPath);

-		String graphTableClassName = parser.get("graphTableClassName");
+		final String graphTableClassName = parser.get("graphTableClassName");
 		log.info("graphTableClassName: {}", graphTableClassName);

-		Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);
+		final Class<? extends OafEntity> entityClazz = (Class<? extends OafEntity>) Class.forName(graphTableClassName);

-		SparkConf conf = new SparkConf();
+		final SparkConf conf = new SparkConf();
 		conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
 		conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());

-		runWithSparkSession(
-			conf,
-			isSparkSessionManaged,
-			spark -> {
-				removeOutputDir(spark, outputPath);
-				joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath);
-			});
+		runWithSparkSession(conf, isSparkSessionManaged, spark -> {
+			removeOutputDir(spark, outputPath);
+			joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath);
+		});
 	}

 	private static <E extends OafEntity> void joinRelationEntity(
-		SparkSession spark,
-		String inputRelationsPath,
-		String inputEntityPath,
-		Class<E> clazz,
-		String outputPath) {
+		final SparkSession spark,
+		final String inputRelationsPath,
+		final String inputEntityPath,
+		final Class<E> clazz,
+		final String outputPath) {

-		Dataset<Tuple2<String, Relation>> relsByTarget = readPathRelation(spark, inputRelationsPath)
+		final Dataset<Tuple2<String, Relation>> relsByTarget = readPathRelation(spark, inputRelationsPath)
 			.map(
 				(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getTarget(),
 					r),
 				Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class)))
 			.cache();

-		Dataset<Tuple2<String, RelatedEntity>> entities = readPathEntity(spark, inputEntityPath, clazz)
+		final Dataset<Tuple2<String, RelatedEntity>> entities = readPathEntity(spark, inputEntityPath, clazz)
 			.filter("dataInfo.invisible == false")
 			.map(
 				(MapFunction<E, Tuple2<String, RelatedEntity>>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)),
-				Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class)))
+				Encoders
+					.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class)))
 			.cache();

 		relsByTarget
@ -118,7 +121,9 @@ public class CreateRelatedEntitiesJob_phase1 {
 	}

 	private static <E extends OafEntity> Dataset<E> readPathEntity(
-		SparkSession spark, String inputEntityPath, Class<E> entityClazz) {
+		final SparkSession spark,
+		final String inputEntityPath,
+		final Class<E> entityClazz) {

 		log.info("Reading Graph table from: {}", inputEntityPath);
 		return spark
@ -129,7 +134,7 @@ public class CreateRelatedEntitiesJob_phase1 {
 				Encoders.bean(entityClazz));
 	}

-	public static <E extends OafEntity> RelatedEntity asRelatedEntity(E entity, Class<E> clazz) {
+	public static <E extends OafEntity> RelatedEntity asRelatedEntity(final E entity, final Class<E> clazz) {

 		final RelatedEntity re = new RelatedEntity();
 		re.setId(entity.getId());
@ -143,7 +148,7 @@ public class CreateRelatedEntitiesJob_phase1 {
 			case dataset:
 			case otherresearchproduct:
 			case software:
-				Result result = (Result) entity;
+				final Result result = (Result) entity;

 				if (result.getTitle() != null && !result.getTitle().isEmpty()) {
 					final StructuredProperty title = result.getTitle().stream().findFirst().get();
@ -170,16 +175,17 @@ public class CreateRelatedEntitiesJob_phase1 {

 				break;
 			case datasource:
-				Datasource d = (Datasource) entity;
+				final Datasource d = (Datasource) entity;

 				re.setOfficialname(getValue(d.getOfficialname()));
 				re.setWebsiteurl(getValue(d.getWebsiteurl()));
 				re.setDatasourcetype(d.getDatasourcetype());
+				re.setDatasourcetypeui(d.getDatasourcetypeui());
 				re.setOpenairecompatibility(d.getOpenairecompatibility());

 				break;
 			case organization:
-				Organization o = (Organization) entity;
+				final Organization o = (Organization) entity;

 				re.setLegalname(getValue(o.getLegalname()));
 				re.setLegalshortname(getValue(o.getLegalshortname()));
@ -187,14 +193,14 @@ public class CreateRelatedEntitiesJob_phase1 {
 				re.setWebsiteurl(getValue(o.getWebsiteurl()));
 				break;
 			case project:
-				Project p = (Project) entity;
+				final Project p = (Project) entity;

 				re.setProjectTitle(getValue(p.getTitle()));
 				re.setCode(getValue(p.getCode()));
 				re.setAcronym(getValue(p.getAcronym()));
 				re.setContracttype(p.getContracttype());

-				List<Field<String>> f = p.getFundingtree();
+				final List<Field<String>> f = p.getFundingtree();
 				if (!f.isEmpty()) {
 					re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList()));
 				}
@ -203,11 +209,11 @@ public class CreateRelatedEntitiesJob_phase1 {
 		return re;
 	}

-	private static String getValue(Field<String> field) {
+	private static String getValue(final Field<String> field) {
 		return getFieldValueWithDefault(field, "");
 	}

-	private static <T> T getFieldValueWithDefault(Field<T> f, T defaultValue) {
+	private static <T> T getFieldValueWithDefault(final Field<T> f, final T defaultValue) {
 		return Optional
 			.ofNullable(f)
 			.filter(Objects::nonNull)
@ -216,21 +222,21 @@ public class CreateRelatedEntitiesJob_phase1 {
 	}

 	/**
-	 * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text
-	 * file,
+	 * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text file,
 	 *
 	 * @param spark
 	 * @param relationPath
 	 * @return the Dataset<SortableRelation> containing all the relationships
 	 */
 	private static Dataset<Relation> readPathRelation(
-		SparkSession spark, final String relationPath) {
+		final SparkSession spark,
+		final String relationPath) {

 		log.info("Reading relations from: {}", relationPath);
 		return spark.read().load(relationPath).as(Encoders.bean(Relation.class));
 	}

-	private static void removeOutputDir(SparkSession spark, String path) {
+	private static void removeOutputDir(final SparkSession spark, final String path) {
 		HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
 	}
 }
--- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
+++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java
@ -735,6 +735,30 @@ public class XmlRecordFactory implements Serializable {
 								.collect(Collectors.toList()));
 				}

+				if (ds.getJurisdiction() != null) {
+					metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction()));
+				}
+
+				if (ds.getThematic() != null) {
+					metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString()));
+				}
+
+				if (ds.getKnowledgegraph() != null) {
+					metadata
+						.add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString()));
+				}
+
+				if (ds.getContentpolicies() != null) {
+					metadata
+						.addAll(
+							ds
+								.getContentpolicies()
+								.stream()
+								.filter(Objects::nonNull)
+								.map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q))
+								.collect(Collectors.toList()));
+				}
+
 				break;
 			case organization:
 				final Organization o = (Organization) entity;