Add resolvereletion step
This commit is contained in:
parent
9058dbb957
commit
004be2e97f
|
@ -72,6 +72,52 @@ def build_new_graph():
|
|||
"--format", "text"
|
||||
]).get_configuration(),
|
||||
kubernetes_conn_id="kubernetes_default"
|
||||
),
|
||||
|
||||
TriggerDagRunOperator(
|
||||
task_id="clean_graph",
|
||||
task_display_name="Clean Results",
|
||||
trigger_dag_id="clean_graph",
|
||||
wait_for_completion=True,
|
||||
conf={
|
||||
"S3_CONN_ID": "{{ dag_run.conf.get('S3_CONN_ID') }}",
|
||||
|
||||
"INPUT_PATH": "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["grouped"],
|
||||
"OUTPUT_PATH": "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["clean"],
|
||||
"WRKDIR_PATH": "{{ dag_run.conf.get('WRKDIR_PATH') }}/dedup",
|
||||
"IS_LOOKUP_URL": "{{ dag_run.conf.get('IS_LOOKUP_URL') }}"
|
||||
}
|
||||
),
|
||||
|
||||
SparkKubernetesOperator(
|
||||
task_id='resolverels',
|
||||
task_display_name="Resolve Relations",
|
||||
namespace='dnet-spark-jobs',
|
||||
template_spec=SparkConfigurator(
|
||||
name="resolverels-{{ ds }}-{{ task_instance.try_number }}",
|
||||
mainClass="eu.dnetlib.dhp.oa.graph.resolution.SparkResolveRelationById",
|
||||
jarLocation='s3a://binaries/dhp-shade-package-1.2.5-SNAPSHOT.jar',
|
||||
arguments=["--graphBasePath", "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["clean"],
|
||||
"--targetPath", "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["resolve"],
|
||||
"--relationPath", "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["consistency"]
|
||||
]).get_configuration(),
|
||||
kubernetes_conn_id="kubernetes_default"
|
||||
),
|
||||
|
||||
SparkKubernetesOperator(
|
||||
task_id='copyresolveents',
|
||||
task_display_name="Copy entities to Resolved Graph",
|
||||
namespace='dnet-spark-jobs',
|
||||
template_spec=SparkConfigurator(
|
||||
name="copyresolveents-{{ ds }}-{{ task_instance.try_number }}",
|
||||
mainClass="eu.dnetlib.dhp.oa.merge.CopyEntitiesSparkJob",
|
||||
jarLocation='s3a://binaries/dhp-shade-package-1.2.5-SNAPSHOT.jar',
|
||||
arguments=["--graphInputPath", "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["clean"],
|
||||
"--outputPath", "{{ dag_run.conf.get('GRAPH_PATH') }}/" + dag_utils.BUILD_PHASES["resolve"],
|
||||
"--entities", ",".join([item for item in dag_utils.GRAPH_ENTITIES if item != "relation"]),
|
||||
"--format", "text"
|
||||
]).get_configuration(),
|
||||
kubernetes_conn_id="kubernetes_default"
|
||||
)
|
||||
|
||||
# , TriggerDagRunOperator(
|
||||
|
|
|
@ -5,6 +5,7 @@ BUILD_PHASES = {
|
|||
"raw": "01_graph_raw",
|
||||
"grouped": "02_graph_grouped",
|
||||
"clean": "03_graph_cleaned",
|
||||
"resolve": "04_graph_resolved",
|
||||
|
||||
"inference": "05_graph_inferred",
|
||||
"dedup": "06_graph_dedup",
|
||||
|
|
Loading…
Reference in New Issue