diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index 1a75deafc..196faf9ca 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -5,6 +5,7 @@ import java.io.StringReader; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.solr.Person; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentException; @@ -89,6 +90,8 @@ public class ProvisionModelSupport { r.setOrganization(mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) e)); } else if (e instanceof eu.dnetlib.dhp.schema.oaf.Project) { r.setProject(mapProject((eu.dnetlib.dhp.schema.oaf.Project) e, vocs)); + } else if (e instanceof eu.dnetlib.dhp.schema.oaf.Person) { + r.setPerson(mapPerson((eu.dnetlib.dhp.schema.oaf.Person) e)); } r .setLinks( @@ -185,6 +188,14 @@ public class ProvisionModelSupport { return ps; } + private static Person mapPerson(eu.dnetlib.dhp.schema.oaf.Person p) { + Person ps = new Person(); + ps.setFamilyName(p.getFamilyName()); + ps.setGivenName(p.getGivenName()); + ps.setAlternativeNames(p.getAlternativeNames()); + return ps; + } + private static Funding mapFunding(List fundingtree, VocabularyGroup vocs) { SAXReader reader = new SAXReader(); return Optional diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 15d3b6300..879911ccc 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -180,6 +180,7 @@ + @@ -378,6 +379,34 @@ + + + yarn + cluster + Join[relation.target = person.id] + eu.dnetlib.dhp.oa.provision.CreateRelatedEntitiesJob_phase1 + dhp-graph-provision-${projectVersion}.jar + + --executor-cores=${sparkExecutorCoresForJoining} + --executor-memory=${sparkExecutorMemoryForJoining} + --driver-memory=${sparkDriverMemoryForJoining} + --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=5000 + --conf spark.network.timeout=${sparkNetworkTimeout} + + --inputRelationsPath${workingDir}/relation + --inputEntityPath${inputGraphRootPath}/person + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Person + --outputPath${workingDir}/join_partial/person + + + + + @@ -388,6 +417,7 @@ + @@ -593,6 +623,35 @@ + + + yarn + cluster + Join[person.id = relatedEntity.source] + eu.dnetlib.dhp.oa.provision.CreateRelatedEntitiesJob_phase2 + dhp-graph-provision-${projectVersion}.jar + + --executor-cores=${sparkExecutorCoresForJoining} + --executor-memory=${sparkExecutorMemoryForJoining} + --driver-memory=${sparkDriverMemoryForJoining} + --conf spark.executor.memoryOverhead=${sparkExecutorMemoryForJoining} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=5000 + --conf spark.network.timeout=${sparkNetworkTimeout} + + --inputEntityPath${inputGraphRootPath}/person + --graphTableClassNameeu.dnetlib.dhp.schema.oaf.Person + --inputRelatedEntitiesPath${workingDir}/join_partial + --outputPath${workingDir}/join_entities/person + --numPartitions10000 + + + + +