From 3e2a2d6e71c4cb9820c858ed114cea9b9ac25021 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:56:55 +0200 Subject: [PATCH] added new fields in xml --- .../CreateRelatedEntitiesJob_phase1.java | 92 ++++++++++--------- .../oa/provision/utils/XmlRecordFactory.java | 24 +++++ 2 files changed, 73 insertions(+), 43 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 7534ce4bdd..b2abbc1562 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -27,14 +27,20 @@ import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import scala.Tuple2; /** - * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type - * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join - * (R.target = T_i.id) save the tuples (R_i, T_i) + * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type E_i map E_i as RelatedEntity + * T_i to simplify the model and extracting only the necessary information join (R.target = T_i.id) save the tuples (R_i, T_i) */ public class CreateRelatedEntitiesJob_phase1 { @@ -42,68 +48,65 @@ public class CreateRelatedEntitiesJob_phase1 { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { - String jsonConfiguration = IOUtils + final String jsonConfiguration = IOUtils .toString( PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); + .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputRelationsPath = parser.get("inputRelationsPath"); + final String inputRelationsPath = parser.get("inputRelationsPath"); log.info("inputRelationsPath: {}", inputRelationsPath); - String inputEntityPath = parser.get("inputEntityPath"); + final String inputEntityPath = parser.get("inputEntityPath"); log.info("inputEntityPath: {}", inputEntityPath); - String outputPath = parser.get("outputPath"); + final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String graphTableClassName = parser.get("graphTableClassName"); + final String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); - Class entityClazz = (Class) Class.forName(graphTableClassName); + final Class entityClazz = (Class) Class.forName(graphTableClassName); - SparkConf conf = new SparkConf(); + final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ProvisionModelSupport.getModelClasses()); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath); - }); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath); + }); } private static void joinRelationEntity( - SparkSession spark, - String inputRelationsPath, - String inputEntityPath, - Class clazz, - String outputPath) { + final SparkSession spark, + final String inputRelationsPath, + final String inputEntityPath, + final Class clazz, + final String outputPath) { - Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath) + final Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath) .map( (MapFunction>) r -> new Tuple2<>(r.getTarget(), r), Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class))) .cache(); - Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) + final Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) .filter("dataInfo.invisible == false") .map( (MapFunction>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) + Encoders + .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) .cache(); relsByTarget @@ -118,7 +121,9 @@ public class CreateRelatedEntitiesJob_phase1 { } private static Dataset readPathEntity( - SparkSession spark, String inputEntityPath, Class entityClazz) { + final SparkSession spark, + final String inputEntityPath, + final Class entityClazz) { log.info("Reading Graph table from: {}", inputEntityPath); return spark @@ -129,7 +134,7 @@ public class CreateRelatedEntitiesJob_phase1 { Encoders.bean(entityClazz)); } - public static RelatedEntity asRelatedEntity(E entity, Class clazz) { + public static RelatedEntity asRelatedEntity(final E entity, final Class clazz) { final RelatedEntity re = new RelatedEntity(); re.setId(entity.getId()); @@ -143,7 +148,7 @@ public class CreateRelatedEntitiesJob_phase1 { case dataset: case otherresearchproduct: case software: - Result result = (Result) entity; + final Result result = (Result) entity; if (result.getTitle() != null && !result.getTitle().isEmpty()) { final StructuredProperty title = result.getTitle().stream().findFirst().get(); @@ -170,16 +175,17 @@ public class CreateRelatedEntitiesJob_phase1 { break; case datasource: - Datasource d = (Datasource) entity; + final Datasource d = (Datasource) entity; re.setOfficialname(getValue(d.getOfficialname())); re.setWebsiteurl(getValue(d.getWebsiteurl())); re.setDatasourcetype(d.getDatasourcetype()); + re.setDatasourcetypeui(d.getDatasourcetypeui()); re.setOpenairecompatibility(d.getOpenairecompatibility()); break; case organization: - Organization o = (Organization) entity; + final Organization o = (Organization) entity; re.setLegalname(getValue(o.getLegalname())); re.setLegalshortname(getValue(o.getLegalshortname())); @@ -187,14 +193,14 @@ public class CreateRelatedEntitiesJob_phase1 { re.setWebsiteurl(getValue(o.getWebsiteurl())); break; case project: - Project p = (Project) entity; + final Project p = (Project) entity; re.setProjectTitle(getValue(p.getTitle())); re.setCode(getValue(p.getCode())); re.setAcronym(getValue(p.getAcronym())); re.setContracttype(p.getContracttype()); - List> f = p.getFundingtree(); + final List> f = p.getFundingtree(); if (!f.isEmpty()) { re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList())); } @@ -203,11 +209,11 @@ public class CreateRelatedEntitiesJob_phase1 { return re; } - private static String getValue(Field field) { + private static String getValue(final Field field) { return getFieldValueWithDefault(field, ""); } - private static T getFieldValueWithDefault(Field f, T defaultValue) { + private static T getFieldValueWithDefault(final Field f, final T defaultValue) { return Optional .ofNullable(f) .filter(Objects::nonNull) @@ -216,21 +222,21 @@ public class CreateRelatedEntitiesJob_phase1 { } /** - * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text - * file, + * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text file, * * @param spark * @param relationPath * @return the Dataset containing all the relationships */ private static Dataset readPathRelation( - SparkSession spark, final String relationPath) { + final SparkSession spark, + final String relationPath) { log.info("Reading relations from: {}", relationPath); return spark.read().load(relationPath).as(Encoders.bean(Relation.class)); } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 392d3cde6d..19300d77db 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -735,6 +735,30 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.toList())); } + if (ds.getJurisdiction() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction())); + } + + if (ds.getThematic() != null) { + metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString())); + } + + if (ds.getKnowledgegraph() != null) { + metadata + .add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString())); + } + + if (ds.getContentpolicies() != null) { + metadata + .addAll( + ds + .getContentpolicies() + .stream() + .filter(Objects::nonNull) + .map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q)) + .collect(Collectors.toList())); + } + break; case organization: final Organization o = (Organization) entity;