forked from D-Net/dnet-hadoop
ES wf properties
This commit is contained in:
parent
1391341d06
commit
6f4d1a37f0
|
@ -55,6 +55,18 @@ public class IndexEventSubsetJob {
|
||||||
final String indexHost = parser.get("esHost");
|
final String indexHost = parser.get("esHost");
|
||||||
log.info("indexHost: {}", indexHost);
|
log.info("indexHost: {}", indexHost);
|
||||||
|
|
||||||
|
final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
|
||||||
|
log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
|
||||||
|
|
||||||
|
final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
|
||||||
|
log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
|
||||||
|
|
||||||
|
final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
|
||||||
|
log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
|
||||||
|
|
||||||
|
final String esNodesWanOnly = parser.get("esNodesWanOnly");
|
||||||
|
log.info("esNodesWanOnly: {}", esNodesWanOnly);
|
||||||
|
|
||||||
final int maxEventsForTopic = NumberUtils.toInt(parser.get("maxEventsForTopic"));
|
final int maxEventsForTopic = NumberUtils.toInt(parser.get("maxEventsForTopic"));
|
||||||
log.info("maxEventsForTopic: {}", maxEventsForTopic);
|
log.info("maxEventsForTopic: {}", maxEventsForTopic);
|
||||||
|
|
||||||
|
@ -86,10 +98,10 @@ public class IndexEventSubsetJob {
|
||||||
esCfg.put("es.index.auto.create", "false");
|
esCfg.put("es.index.auto.create", "false");
|
||||||
esCfg.put("es.nodes", indexHost);
|
esCfg.put("es.nodes", indexHost);
|
||||||
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
|
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
|
||||||
esCfg.put("es.batch.write.retry.count", "8");
|
esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
|
||||||
esCfg.put("es.batch.write.retry.wait", "60s");
|
esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
|
||||||
esCfg.put("es.batch.size.entries", "200");
|
esCfg.put("es.batch.size.entries", esBatchSizeEntries);
|
||||||
esCfg.put("es.nodes.wan.only", "true");
|
esCfg.put("es.nodes.wan.only", esNodesWanOnly);
|
||||||
|
|
||||||
log.info("*** Start indexing");
|
log.info("*** Start indexing");
|
||||||
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
||||||
|
|
|
@ -63,6 +63,18 @@ public class IndexNotificationsJob {
|
||||||
final String indexHost = parser.get("esHost");
|
final String indexHost = parser.get("esHost");
|
||||||
log.info("indexHost: {}", indexHost);
|
log.info("indexHost: {}", indexHost);
|
||||||
|
|
||||||
|
final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
|
||||||
|
log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
|
||||||
|
|
||||||
|
final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
|
||||||
|
log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
|
||||||
|
|
||||||
|
final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
|
||||||
|
log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
|
||||||
|
|
||||||
|
final String esNodesWanOnly = parser.get("esNodesWanOnly");
|
||||||
|
log.info("esNodesWanOnly: {}", esNodesWanOnly);
|
||||||
|
|
||||||
final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
|
final String brokerApiBaseUrl = parser.get("brokerApiBaseUrl");
|
||||||
log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
|
log.info("brokerApiBaseUrl: {}", brokerApiBaseUrl);
|
||||||
|
|
||||||
|
@ -92,10 +104,10 @@ public class IndexNotificationsJob {
|
||||||
esCfg.put("es.index.auto.create", "false");
|
esCfg.put("es.index.auto.create", "false");
|
||||||
esCfg.put("es.nodes", indexHost);
|
esCfg.put("es.nodes", indexHost);
|
||||||
esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY
|
esCfg.put("es.mapping.id", "notificationId"); // THE PRIMARY KEY
|
||||||
esCfg.put("es.batch.write.retry.count", "8");
|
esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
|
||||||
esCfg.put("es.batch.write.retry.wait", "60s");
|
esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
|
||||||
esCfg.put("es.batch.size.entries", "200");
|
esCfg.put("es.batch.size.entries", esBatchSizeEntries);
|
||||||
esCfg.put("es.nodes.wan.only", "true");
|
esCfg.put("es.nodes.wan.only", esNodesWanOnly);
|
||||||
|
|
||||||
log.info("*** Start indexing");
|
log.info("*** Start indexing");
|
||||||
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
||||||
|
|
|
@ -45,6 +45,18 @@ public class IndexOnESJob {
|
||||||
final String indexHost = parser.get("esHost");
|
final String indexHost = parser.get("esHost");
|
||||||
log.info("indexHost: {}", indexHost);
|
log.info("indexHost: {}", indexHost);
|
||||||
|
|
||||||
|
final String esBatchWriteRetryCount = parser.get("esBatchWriteRetryCount");
|
||||||
|
log.info("esBatchWriteRetryCount: {}", esBatchWriteRetryCount);
|
||||||
|
|
||||||
|
final String esBatchWriteRetryWait = parser.get("esBatchWriteRetryWait");
|
||||||
|
log.info("esBatchWriteRetryWait: {}", esBatchWriteRetryWait);
|
||||||
|
|
||||||
|
final String esBatchSizeEntries = parser.get("esBatchSizeEntries");
|
||||||
|
log.info("esBatchSizeEntries: {}", esBatchSizeEntries);
|
||||||
|
|
||||||
|
final String esNodesWanOnly = parser.get("esNodesWanOnly");
|
||||||
|
log.info("esNodesWanOnly: {}", esNodesWanOnly);
|
||||||
|
|
||||||
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
|
final SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
|
||||||
|
|
||||||
final JavaRDD<String> inputRdd = ClusterUtils
|
final JavaRDD<String> inputRdd = ClusterUtils
|
||||||
|
@ -53,15 +65,13 @@ public class IndexOnESJob {
|
||||||
.javaRDD();
|
.javaRDD();
|
||||||
|
|
||||||
final Map<String, String> esCfg = new HashMap<>();
|
final Map<String, String> esCfg = new HashMap<>();
|
||||||
// esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54");
|
|
||||||
|
|
||||||
esCfg.put("es.index.auto.create", "false");
|
esCfg.put("es.index.auto.create", "false");
|
||||||
esCfg.put("es.nodes", indexHost);
|
esCfg.put("es.nodes", indexHost);
|
||||||
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
|
esCfg.put("es.mapping.id", "eventId"); // THE PRIMARY KEY
|
||||||
esCfg.put("es.batch.write.retry.count", "8");
|
esCfg.put("es.batch.write.retry.count", esBatchWriteRetryCount);
|
||||||
esCfg.put("es.batch.write.retry.wait", "60s");
|
esCfg.put("es.batch.write.retry.wait", esBatchWriteRetryWait);
|
||||||
esCfg.put("es.batch.size.entries", "200");
|
esCfg.put("es.batch.size.entries", esBatchSizeEntries);
|
||||||
esCfg.put("es.nodes.wan.only", "true");
|
esCfg.put("es.nodes.wan.only", esNodesWanOnly);
|
||||||
|
|
||||||
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
JavaEsSpark.saveJsonToEs(inputRdd, index, esCfg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,26 @@
|
||||||
<name>esIndexHost</name>
|
<name>esIndexHost</name>
|
||||||
<description>the elasticsearch host</description>
|
<description>the elasticsearch host</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esBatchWriteRetryCount</name>
|
||||||
|
<value>8</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esBatchWriteRetryWait</name>
|
||||||
|
<value>60s</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esBatchSizeEntries</name>
|
||||||
|
<value>200</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esNodesWanOnly</name>
|
||||||
|
<value>true</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>maxIndexedEventsForDsAndTopic</name>
|
<name>maxIndexedEventsForDsAndTopic</name>
|
||||||
<description>the max number of events for each couple (ds/topic)</description>
|
<description>the max number of events for each couple (ds/topic)</description>
|
||||||
|
@ -478,6 +498,10 @@
|
||||||
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
||||||
<arg>--index</arg><arg>${esEventIndexName}</arg>
|
<arg>--index</arg><arg>${esEventIndexName}</arg>
|
||||||
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
||||||
|
<arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
|
||||||
|
<arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
|
||||||
|
<arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
|
||||||
|
<arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
|
||||||
<arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
|
<arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
|
||||||
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
@ -505,6 +529,10 @@
|
||||||
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
||||||
<arg>--index</arg><arg>${esNotificationsIndexName}</arg>
|
<arg>--index</arg><arg>${esNotificationsIndexName}</arg>
|
||||||
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
||||||
|
<arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
|
||||||
|
<arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
|
||||||
|
<arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
|
||||||
|
<arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
|
||||||
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="stats"/>
|
<ok to="stats"/>
|
||||||
|
|
|
@ -16,5 +16,29 @@
|
||||||
"paramLongName": "esHost",
|
"paramLongName": "esHost",
|
||||||
"paramDescription": "the ES host",
|
"paramDescription": "the ES host",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esBatchWriteRetryCount",
|
||||||
|
"paramLongName": "esBatchWriteRetryCount",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esBatchWriteRetryWait",
|
||||||
|
"paramLongName": "esBatchWriteRetryWait",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esBatchSizeEntries",
|
||||||
|
"paramLongName": "esBatchSizeEntries",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esNodesWanOnly",
|
||||||
|
"paramLongName": "esNodesWanOnly",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -16,7 +16,31 @@
|
||||||
"paramLongName": "esHost",
|
"paramLongName": "esHost",
|
||||||
"paramDescription": "the ES host",
|
"paramDescription": "the ES host",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esBatchWriteRetryCount",
|
||||||
|
"paramLongName": "esBatchWriteRetryCount",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esBatchWriteRetryWait",
|
||||||
|
"paramLongName": "esBatchWriteRetryWait",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esBatchSizeEntries",
|
||||||
|
"paramLongName": "esBatchSizeEntries",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esNodesWanOnly",
|
||||||
|
"paramLongName": "esNodesWanOnly",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"paramName": "n",
|
"paramName": "n",
|
||||||
"paramLongName": "maxEventsForTopic",
|
"paramLongName": "maxEventsForTopic",
|
||||||
|
|
|
@ -17,6 +17,30 @@
|
||||||
"paramDescription": "the ES host",
|
"paramDescription": "the ES host",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esBatchWriteRetryCount",
|
||||||
|
"paramLongName": "esBatchWriteRetryCount",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esBatchWriteRetryWait",
|
||||||
|
"paramLongName": "esBatchWriteRetryWait",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esBatchSizeEntries",
|
||||||
|
"paramLongName": "esBatchSizeEntries",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "esNodesWanOnly",
|
||||||
|
"paramLongName": "esNodesWanOnly",
|
||||||
|
"paramDescription": "an ES configuration property",
|
||||||
|
"paramRequired": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"paramName": "broker",
|
"paramName": "broker",
|
||||||
"paramLongName": "brokerApiBaseUrl",
|
"paramLongName": "brokerApiBaseUrl",
|
||||||
|
|
|
@ -36,6 +36,26 @@
|
||||||
<name>esIndexHost</name>
|
<name>esIndexHost</name>
|
||||||
<description>the elasticsearch host</description>
|
<description>the elasticsearch host</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esBatchWriteRetryCount</name>
|
||||||
|
<value>8</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esBatchWriteRetryWait</name>
|
||||||
|
<value>60s</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esBatchSizeEntries</name>
|
||||||
|
<value>200</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esNodesWanOnly</name>
|
||||||
|
<value>true</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>maxIndexedEventsForDsAndTopic</name>
|
<name>maxIndexedEventsForDsAndTopic</name>
|
||||||
<description>the max number of events for each couple (ds/topic)</description>
|
<description>the max number of events for each couple (ds/topic)</description>
|
||||||
|
@ -125,6 +145,10 @@
|
||||||
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
||||||
<arg>--index</arg><arg>${esNotificationsIndexName}</arg>
|
<arg>--index</arg><arg>${esNotificationsIndexName}</arg>
|
||||||
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
||||||
|
<arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
|
||||||
|
<arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
|
||||||
|
<arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
|
||||||
|
<arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
|
||||||
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
|
|
|
@ -13,6 +13,26 @@
|
||||||
<name>esIndexHost</name>
|
<name>esIndexHost</name>
|
||||||
<description>the elasticsearch host</description>
|
<description>the elasticsearch host</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esBatchWriteRetryCount</name>
|
||||||
|
<value>8</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esBatchWriteRetryWait</name>
|
||||||
|
<value>60s</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esBatchSizeEntries</name>
|
||||||
|
<value>200</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>esNodesWanOnly</name>
|
||||||
|
<value>true</value>
|
||||||
|
<description>an ES configuration property</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>maxIndexedEventsForDsAndTopic</name>
|
<name>maxIndexedEventsForDsAndTopic</name>
|
||||||
<description>the max number of events for each couple (ds/topic)</description>
|
<description>the max number of events for each couple (ds/topic)</description>
|
||||||
|
@ -102,6 +122,10 @@
|
||||||
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
||||||
<arg>--index</arg><arg>${esEventIndexName}</arg>
|
<arg>--index</arg><arg>${esEventIndexName}</arg>
|
||||||
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
<arg>--esHost</arg><arg>${esIndexHost}</arg>
|
||||||
|
<arg>--esBatchWriteRetryCount</arg><arg>${esBatchWriteRetryCount}</arg>
|
||||||
|
<arg>--esBatchWriteRetryWait</arg><arg>${esBatchWriteRetryWait}</arg>
|
||||||
|
<arg>--esBatchSizeEntries</arg><arg>${esBatchSizeEntries}</arg>
|
||||||
|
<arg>--esNodesWanOnly</arg><arg>${esNodesWanOnly}</arg>
|
||||||
<arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
|
<arg>--maxEventsForTopic</arg><arg>${maxIndexedEventsForDsAndTopic}</arg>
|
||||||
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
<arg>--brokerApiBaseUrl</arg><arg>${brokerApiBaseUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
|
Loading…
Reference in New Issue