[graph provision] added person to the graph2hive workflow

2024-08-05 09:35:07 +02:00 · 2024-08-05 09:35:07 +02:00 · 0bf76f2a34
parent 975d44cac7
commit 0bf76f2a34
1 changed files with 30 additions and 0 deletions
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/workflow.xml
@ -102,6 +102,7 @@
        <path start="import_datasource"/>
        <path start="import_organization"/>
        <path start="import_project"/>
        <path start="import_person"/>
        <path start="import_relation"/>
    </fork>
@ -308,6 +309,35 @@
        <error to="Kill"/>
    </action>
    <action name="import_person">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
            <mode>cluster</mode>
            <name>Import table person</name>
            <class>eu.dnetlib.dhp.oa.graph.hive.GraphHiveTableImporterJob</class>
            <jar>dhp-graph-mapper-${projectVersion}.jar</jar>
            <spark-opts>
                --executor-memory=${sparkExecutorMemory}
                --executor-cores=${sparkExecutorCores}
                --driver-memory=${sparkDriverMemory}
                --conf spark.executor.memoryOverhead=${sparkExecutorMemory}
                --conf spark.extraListeners=${spark2ExtraListeners}
                --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
                --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
                --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
                --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
                --conf spark.sql.shuffle.partitions=1000
            </spark-opts>
            <arg>--inputPath</arg><arg>${inputPath}/person</arg>
            <arg>--hiveDbName</arg><arg>${hiveDbName}</arg>
            <arg>--className</arg><arg>eu.dnetlib.dhp.schema.oaf.Person</arg>
            <arg>--hiveMetastoreUris</arg><arg>${hiveMetastoreUris}</arg>
            <arg>--numPartitions</arg><arg>1000</arg>
        </spark>
        <ok to="join_import"/>
        <error to="Kill"/>
    </action>
    <action name="import_relation">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>