forked from D-Net/dnet-hadoop
[openorgs dedup] fixed workflow parameter declarations. Introduced support for resuming the execution from intermediate steps
This commit is contained in:
parent
d0d477cca3
commit
815b9f4d56
|
@ -154,7 +154,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction {
|
|||
(FlatMapFunction<ConnectedComponent, Relation>) cc -> ccToMergeRel(cc, dedupConf),
|
||||
Encoders.bean(Relation.class));
|
||||
|
||||
mergeRels.write().mode(SaveMode.Append).parquet(mergeRelPath);
|
||||
mergeRels.write().mode(SaveMode.Overwrite).parquet(mergeRelPath);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -109,7 +109,7 @@ public class SparkCreateSimRels extends AbstractSparkAction {
|
|||
.rdd(),
|
||||
Encoders.bean(Relation.class));
|
||||
|
||||
saveParquet(simRels, outputPath, SaveMode.Append);
|
||||
saveParquet(simRels, outputPath, SaveMode.Overwrite);
|
||||
|
||||
log.info("Generated " + simRels.count() + " Similarity Relations");
|
||||
|
||||
|
|
|
@ -69,10 +69,6 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
|
|||
.map(Integer::valueOf)
|
||||
.orElse(NUM_CONNECTIONS);
|
||||
|
||||
final String apiUrl = Optional
|
||||
.ofNullable(parser.get("apiUrl"))
|
||||
.orElse("");
|
||||
|
||||
final String dbUrl = parser.get("dbUrl");
|
||||
final String dbTable = parser.get("dbTable");
|
||||
final String dbUser = parser.get("dbUser");
|
||||
|
@ -83,7 +79,6 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
|
|||
log.info("actionSetId: '{}'", actionSetId);
|
||||
log.info("workingPath: '{}'", workingPath);
|
||||
log.info("numPartitions: '{}'", numConnections);
|
||||
log.info("apiUrl: '{}'", apiUrl);
|
||||
log.info("dbUrl: '{}'", dbUrl);
|
||||
log.info("dbUser: '{}'", dbUser);
|
||||
log.info("table: '{}'", dbTable);
|
||||
|
@ -106,10 +101,6 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
|
|||
.write()
|
||||
.mode(SaveMode.Append)
|
||||
.jdbc(dbUrl, dbTable, connectionProperties);
|
||||
|
||||
if (!apiUrl.isEmpty())
|
||||
updateSimRels(apiUrl);
|
||||
|
||||
}
|
||||
|
||||
public static Dataset<OrgSimRel> createNewOrgs(
|
||||
|
@ -198,18 +189,6 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction {
|
|||
|
||||
}
|
||||
|
||||
private static String updateSimRels(final String apiUrl) throws IOException {
|
||||
|
||||
log.info("Updating simrels on the portal");
|
||||
|
||||
final HttpGet req = new HttpGet(apiUrl);
|
||||
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||
return IOUtils.toString(response.getEntity().getContent());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean filterRels(Relation rel, String entityType) {
|
||||
|
||||
switch (entityType) {
|
||||
|
|
|
@ -12,6 +12,22 @@
|
|||
<name>actionSetId</name>
|
||||
<description>id of the actionSet</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>apiUrl</name>
|
||||
<description>OpenOrgs API to finalise the suggestions import procedure</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dbUrl</name>
|
||||
<description>jdbc URL of the OpenOrgs database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dbUser</name>
|
||||
<description>username to access the OpenOrgs database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>dbPwd</name>
|
||||
<description>password to access the OpenOrgs database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>workingPath</name>
|
||||
<description>path for the working directory</description>
|
||||
|
@ -75,7 +91,17 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="resetOrgSimRels"/>
|
||||
<start to="resume_from"/>
|
||||
|
||||
<decision name="resume_from">
|
||||
<switch>
|
||||
<case to="CreateSimRels">${wf:conf('resumeFrom') eq 'CreateSimRels'}</case>
|
||||
<case to="CreateMergeRels">${wf:conf('resumeFrom') eq 'CreateMergeRels'}</case>
|
||||
<case to="PrepareOrgRels">${wf:conf('resumeFrom') eq 'PrepareOrgRels'}</case>
|
||||
<case to="update_openorgs">${wf:conf('resumeFrom') eq 'update_openorgs'}</case>
|
||||
<default to="resetOrgSimRels"/>
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
|
@ -83,8 +109,8 @@
|
|||
|
||||
<action name="resetOrgSimRels">
|
||||
<fs>
|
||||
<delete path="${workingPath}/${actionSetIdOpenorgs}/organization_simrel"/>
|
||||
<delete path="${workingPath}/${actionSetIdOpenorgs}/organization_mergerel"/>
|
||||
<delete path="${workingPath}/${actionSetId}/organization_simrel"/>
|
||||
<delete path="${workingPath}/${actionSetId}/organization_mergerel"/>
|
||||
</fs>
|
||||
<ok to="CreateSimRels"/>
|
||||
<error to="Kill"/>
|
||||
|
@ -109,7 +135,7 @@
|
|||
</spark-opts>
|
||||
<arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetIdOpenorgs}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetId}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--numPartitions</arg><arg>1000</arg>
|
||||
</spark>
|
||||
|
@ -138,7 +164,7 @@
|
|||
<arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetIdOpenorgs}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetId}</arg>
|
||||
<arg>--numPartitions</arg><arg>1000</arg>
|
||||
</spark>
|
||||
<ok to="CreateMergeRels"/>
|
||||
|
@ -165,7 +191,7 @@
|
|||
<arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetIdOpenorgs}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetId}</arg>
|
||||
<arg>--cutConnectedComponent</arg><arg>${cutConnectedComponent}</arg>
|
||||
</spark>
|
||||
<ok to="PrepareOrgRels"/>
|
||||
|
@ -192,7 +218,7 @@
|
|||
<arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetIdOpenorgs}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetId}</arg>
|
||||
<arg>--dbUrl</arg><arg>${dbUrl}</arg>
|
||||
<arg>--dbTable</arg><arg>${dbTable}</arg>
|
||||
<arg>--dbUser</arg><arg>${dbUser}</arg>
|
||||
|
@ -223,14 +249,24 @@
|
|||
<arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
|
||||
<arg>--workingPath</arg><arg>${workingPath}</arg>
|
||||
<arg>--isLookUpUrl</arg><arg>${isLookUpUrl}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetIdOpenorgs}</arg>
|
||||
<arg>--apiUrl</arg><arg>${apiUrl}</arg>
|
||||
<arg>--actionSetId</arg><arg>${actionSetId}</arg>
|
||||
<arg>--dbUrl</arg><arg>${dbUrl}</arg>
|
||||
<arg>--dbTable</arg><arg>${dbTable}</arg>
|
||||
<arg>--dbUser</arg><arg>${dbUser}</arg>
|
||||
<arg>--dbPwd</arg><arg>${dbPwd}</arg>
|
||||
<arg>--numConnections</arg><arg>20</arg>
|
||||
</spark>
|
||||
<ok to="update_openorgs"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="update_openorgs">
|
||||
<shell xmlns="uri:oozie:shell-action:0.1">
|
||||
<job-tracker>${jobTracker}</job-tracker>
|
||||
<name-node>${nameNode}</name-node>
|
||||
<exec>/usr/bin/curl</exec>
|
||||
<argument>${apiUrl}</argument>
|
||||
</shell>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
|
Loading…
Reference in New Issue