ES wf properties

This commit is contained in:
Michele Artini 2020-12-14 12:02:33 +01:00
parent d03756c962
commit a203aee32a
9 changed files with 196 additions and 14 deletions

View File

@ -55,6 +55,18 @@ public class IndexEventSubsetJob {
final String indexHost = parser.get("esHost"); final String indexHost = parser.get("esHost");
log.info("indexHost: {}", indexHost); log.info("indexHost: {}", indexHost);
final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
final String esNodesWanOnly = parser.get("esNodesWanOnly");
log.info("esNodesWanOnly: {}", esNodesWanOnly);
final int maxEventsForTopic = NumberUtils.toInt(parser.get("maxEventsForTopic")); final int maxEventsForTopic = NumberUtils.toInt(parser.get("maxEventsForTopic"));
log.info("maxEventsForTopic: {}", maxEventsForTopic); log.info("maxEventsForTopic: {}", maxEventsForTopic);
@ -86,10 +98,10 @@ public class IndexEventSubsetJob {
esCfg.put("es.index.auto.create", "false"); esCfg.put("es.index.auto.create", "false");
esCfg.put("es.nodes", indexHost); esCfg.put("es.nodes", indexHost);
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
esCfg.put("es.batch.write.retry.count", "8"); esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
esCfg.put("es.batch.write.retry.wait", "60s"); esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
esCfg.put("es.batch.size.entries", "200"); esCfg.put("es.batch.size.entries", esBatchSizeEntries);
esCfg.put("es.nodes.wan.only", "true"); esCfg.put("es.nodes.wan.only", esNodesWanOnly);
log.info("*** Start indexing"); log.info("*** Start indexing");
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg); JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);

View File

@ -63,6 +63,18 @@ public class IndexNotificationsJob {
final String indexHost = parser.get("esHost"); final String indexHost = parser.get("esHost");
log.info("indexHost: {}", indexHost); log.info("indexHost: {}", indexHost);
final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
final String esNodesWanOnly = parser.get("esNodesWanOnly");
log.info("esNodesWanOnly: {}", esNodesWanOnly);
final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl"); final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl); log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
@ -92,10 +104,10 @@ public class IndexNotificationsJob {
esCfg.put("es.index.auto.create", "false"); esCfg.put("es.index.auto.create", "false");
esCfg.put("es.nodes", indexHost); esCfg.put("es.nodes", indexHost);
esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY
esCfg.put("es.batch.write.retry.count", "8"); esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
esCfg.put("es.batch.write.retry.wait", "60s"); esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
esCfg.put("es.batch.size.entries", "200"); esCfg.put("es.batch.size.entries", esBatchSizeEntries);
esCfg.put("es.nodes.wan.only", "true"); esCfg.put("es.nodes.wan.only", esNodesWanOnly);
log.info("*** Start indexing"); log.info("*** Start indexing");
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg); JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);

View File

@ -45,6 +45,18 @@ public class IndexOnESJob {
final String indexHost = parser.get("esHost"); final String indexHost = parser.get("esHost");
log.info("indexHost: {}", indexHost); log.info("indexHost: {}", indexHost);
final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
final String esNodesWanOnly = parser.get("esNodesWanOnly");
log.info("esNodesWanOnly: {}", esNodesWanOnly);
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate(); final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
final JavaRDD<String> inputRdd = ClusterUtils final JavaRDD<String> inputRdd = ClusterUtils
@ -53,15 +65,13 @@ public class IndexOnESJob {
.javaRDD(); .javaRDD();
final Map<String, String> esCfg = new HashMap<>(); final Map<String, String> esCfg = new HashMap<>();
// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
esCfg.put("es.index.auto.create", "false"); esCfg.put("es.index.auto.create", "false");
esCfg.put("es.nodes", indexHost); esCfg.put("es.nodes", indexHost);
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
esCfg.put("es.batch.write.retry.count", "8"); esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
esCfg.put("es.batch.write.retry.wait", "60s"); esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
esCfg.put("es.batch.size.entries", "200"); esCfg.put("es.batch.size.entries", esBatchSizeEntries);
esCfg.put("es.nodes.wan.only", "true"); esCfg.put("es.nodes.wan.only", esNodesWanOnly);
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg); JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
} }

View File

@ -41,6 +41,26 @@
<name>esIndexHost</name> <name>esIndexHost</name>
<description>the elasticsearch host</description> <description>the elasticsearch host</description>
</property> </property>
<property>
<name>esBatchWriteRetryCount</name>
<value>8</value>
<description>an ES configuration property</description>
</property>
<property>
<name>esBatchWriteRetryWait</name>
<value>60s</value>
<description>an ES configuration property</description>
</property>
<property>
<name>esBatchSizeEntries</name>
<value>200</value>
<description>an ES configuration property</description>
</property>
<property>
<name>esNodesWanOnly</name>
<value>true</value>
<description>an ES configuration property</description>
</property>
<property> <property>
<name>maxIndexedEventsForDsAndTopic</name> <name>maxIndexedEventsForDsAndTopic</name>
<description>the max number of events for each couple (ds/topic)</description> <description>the max number of events for each couple (ds/topic)</description>
@ -478,6 +498,10 @@
<arg>--outputDir</arg><arg>${outputDir}</arg> <arg>--outputDir</arg><arg>${outputDir}</arg>
<arg>--index</arg><arg>${esEventIndexName}</arg> <arg>--index</arg><arg>${esEventIndexName}</arg>
<arg>--esHost</arg><arg>${esIndexHost}</arg> <arg>--esHost</arg><arg>${esIndexHost}</arg>
<arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
<arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
<arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
<arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
<arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg> <arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg> <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
</spark> </spark>
@ -505,6 +529,10 @@
<arg>--outputDir</arg><arg>${outputDir}</arg> <arg>--outputDir</arg><arg>${outputDir}</arg>
<arg>--index</arg><arg>${esNotificationsIndexName}</arg> <arg>--index</arg><arg>${esNotificationsIndexName}</arg>
<arg>--esHost</arg><arg>${esIndexHost}</arg> <arg>--esHost</arg><arg>${esIndexHost}</arg>
<arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
<arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
<arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
<arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg> <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
</spark> </spark>
<ok to="stats"/> <ok to="stats"/>

View File

@ -16,5 +16,29 @@
"paramLongName": "esHost", "paramLongName": "esHost",
"paramDescription": "the ES host", "paramDescription": "the ES host",
"paramRequired": true "paramRequired": true
},
{
"paramName": "esBatchWriteRetryCount",
"paramLongName": "esBatchWriteRetryCount",
"paramDescription": "an ES configuration property",
"paramRequired": true
},
{
"paramName": "esBatchWriteRetryWait",
"paramLongName": "esBatchWriteRetryWait",
"paramDescription": "an ES configuration property",
"paramRequired": true
},
{
"paramName": "esBatchSizeEntries",
"paramLongName": "esBatchSizeEntries",
"paramDescription": "an ES configuration property",
"paramRequired": true
},
{
"paramName": "esNodesWanOnly",
"paramLongName": "esNodesWanOnly",
"paramDescription": "an ES configuration property",
"paramRequired": true
} }
] ]

View File

@ -16,7 +16,31 @@
"paramLongName": "esHost", "paramLongName": "esHost",
"paramDescription": "the ES host", "paramDescription": "the ES host",
"paramRequired": true "paramRequired": true
},
{
"paramName": "esBatchWriteRetryCount",
"paramLongName": "esBatchWriteRetryCount",
"paramDescription": "an ES configuration property",
"paramRequired": true
}, },
{
"paramName": "esBatchWriteRetryWait",
"paramLongName": "esBatchWriteRetryWait",
"paramDescription": "an ES configuration property",
"paramRequired": true
},
{
"paramName": "esBatchSizeEntries",
"paramLongName": "esBatchSizeEntries",
"paramDescription": "an ES configuration property",
"paramRequired": true
},
{
"paramName": "esNodesWanOnly",
"paramLongName": "esNodesWanOnly",
"paramDescription": "an ES configuration property",
"paramRequired": true
},
{ {
"paramName": "n", "paramName": "n",
"paramLongName": "maxEventsForTopic", "paramLongName": "maxEventsForTopic",

View File

@ -17,6 +17,30 @@
"paramDescription": "the ES host", "paramDescription": "the ES host",
"paramRequired": true "paramRequired": true
}, },
{
"paramName": "esBatchWriteRetryCount",
"paramLongName": "esBatchWriteRetryCount",
"paramDescription": "an ES configuration property",
"paramRequired": true
},
{
"paramName": "esBatchWriteRetryWait",
"paramLongName": "esBatchWriteRetryWait",
"paramDescription": "an ES configuration property",
"paramRequired": true
},
{
"paramName": "esBatchSizeEntries",
"paramLongName": "esBatchSizeEntries",
"paramDescription": "an ES configuration property",
"paramRequired": true
},
{
"paramName": "esNodesWanOnly",
"paramLongName": "esNodesWanOnly",
"paramDescription": "an ES configuration property",
"paramRequired": true
},
{ {
"paramName": "broker", "paramName": "broker",
"paramLongName": "brokerApiBaseUrl", "paramLongName": "brokerApiBaseUrl",

View File

@ -36,6 +36,26 @@
<name>esIndexHost</name> <name>esIndexHost</name>
<description>the elasticsearch host</description> <description>the elasticsearch host</description>
</property> </property>
<property>
<name>esBatchWriteRetryCount</name>
<value>8</value>
<description>an ES configuration property</description>
</property>
<property>
<name>esBatchWriteRetryWait</name>
<value>60s</value>
<description>an ES configuration property</description>
</property>
<property>
<name>esBatchSizeEntries</name>
<value>200</value>
<description>an ES configuration property</description>
</property>
<property>
<name>esNodesWanOnly</name>
<value>true</value>
<description>an ES configuration property</description>
</property>
<property> <property>
<name>maxIndexedEventsForDsAndTopic</name> <name>maxIndexedEventsForDsAndTopic</name>
<description>the max number of events for each couple (ds/topic)</description> <description>the max number of events for each couple (ds/topic)</description>
@ -125,6 +145,10 @@
<arg>--outputDir</arg><arg>${outputDir}</arg> <arg>--outputDir</arg><arg>${outputDir}</arg>
<arg>--index</arg><arg>${esNotificationsIndexName}</arg> <arg>--index</arg><arg>${esNotificationsIndexName}</arg>
<arg>--esHost</arg><arg>${esIndexHost}</arg> <arg>--esHost</arg><arg>${esIndexHost}</arg>
<arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
<arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
<arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
<arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg> <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
</spark> </spark>
<ok to="End"/> <ok to="End"/>

View File

@ -13,6 +13,26 @@
<name>esIndexHost</name> <name>esIndexHost</name>
<description>the elasticsearch host</description> <description>the elasticsearch host</description>
</property> </property>
<property>
<name>esBatchWriteRetryCount</name>
<value>8</value>
<description>an ES configuration property</description>
</property>
<property>
<name>esBatchWriteRetryWait</name>
<value>60s</value>
<description>an ES configuration property</description>
</property>
<property>
<name>esBatchSizeEntries</name>
<value>200</value>
<description>an ES configuration property</description>
</property>
<property>
<name>esNodesWanOnly</name>
<value>true</value>
<description>an ES configuration property</description>
</property>
<property> <property>
<name>maxIndexedEventsForDsAndTopic</name> <name>maxIndexedEventsForDsAndTopic</name>
<description>the max number of events for each couple (ds/topic)</description> <description>the max number of events for each couple (ds/topic)</description>
@ -102,6 +122,10 @@
<arg>--outputDir</arg><arg>${outputDir}</arg> <arg>--outputDir</arg><arg>${outputDir}</arg>
<arg>--index</arg><arg>${esEventIndexName}</arg> <arg>--index</arg><arg>${esEventIndexName}</arg>
<arg>--esHost</arg><arg>${esIndexHost}</arg> <arg>--esHost</arg><arg>${esIndexHost}</arg>
<arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
<arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
<arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
<arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
<arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg> <arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg> <arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
</spark> </spark>