--- # Generic machines data time_zone: 'Europe/Rome' cpu_cores: 8 datanode_ram: 11000 nagios_enabled: True ganglia_enabled: False ganglia_gmond_hdfs_datanodes_port: "8640:8660" ganglia_gmond_jobtracker_port: "8640:8660" ganglia_gmond_hbmaster_port: "8640:8660" ganglia_gmond_namenode_port: "8640:8660" configure_munin: True # JDK (Oracle) jdk_version: - 7 - 8 jdk_default: 8 java_home: '/usr/lib/jvm/java-{{ jdk_default }}-oracle' jdk_java_home: '{{ java_home }}' # PKG state: latest or present. Set to 'latest' when you want to upgrade the installed packages version. hadoop_pkg_state: present # # # Global data # worker_nodes_num: 4 worker_node_start: 2 worker_node_end: 5 worker_node_swappiness: 0 dns_domain: t.hadoop.research-infrastructures.eu namenode_hostname: 'nn1.{{ dns_domain }}' secondary_nm_hostname: 'nn2.{{ dns_domain }}' quorum_0_node_hostname: 'quorum0.{{ dns_domain }}' quorum_1_node_hostname: 'quorum1.{{ dns_domain }}' quorum_2_node_hostname: 'quorum2.{{ dns_domain }}' quorum_3_node_hostname: 'quorum3.{{ dns_domain }}' quorum_4_node_hostname: 'quorum4.{{ dns_domain }}' hbase_master_1_hostname: 'hbase-master1.{{ dns_domain }}' hbase_master_2_hostname: 'hbase-master2.{{ dns_domain }}' ldap: server: ldap://ldap.sub.research-infrastructures.eu search_bind_auth: False username_pattern: "uid=,ou=People,o=Users,ou=Organizations,dc=research-infrastructures,dc=eu" hadoop_ldap_uri: ldap://ldap.sub.research-infrastructures.eu hadoop_ldap_base_dn: "dc=research-infrastructures,dc=eu" hadoop_ldap_search_bind_auth: False hadoop_ldap_username_pattern: "uid=,ou=People,o=Users,ou=Organizations,dc=research-infrastructures,dc=eu" # # LOGGING # # WARN,INFO,DEBUG,ERROR hadoop_log_level: INFO # # RFA is the rolling file appender hadoop_log_appender: RFA hadoop_log_appender_max_filesize: 256MB # max backup index is ignored if the appender is daily rolling file hadoop_log_appender_max_backupindex: 10 # # We can use a logstash collector hadoop_send_to_logstash: False # Ditch the local appender if you want a logstash only solution hadoop_logstash_appender: RFA,LOGSTASH hadoop_logstash_collector_host: 'logstash.{{ dns_domain }}' hadoop_logstash_collector_socketappender_port: 4560 hadoop_logstash_collector_socketappender_reconndelay: 10000 # # rsyslog rsyslog_install_newer_package: True rsyslog_send_to_elasticsearch: False rsyslog_use_queues: False rsyslog_use_elasticsearch_module: False rsys_elasticsearch_collector_host: '{{ hadoop_logstash_collector_host }}' rsys_elasticsearch_collector_port: 9200 # # General hadoop # initialize_hadoop_cluster: False hadoop_cluster_name: "nmis-hadoop-cluster" hadoop_data_dir: /data hadoop_conf_dir: '/etc/hadoop/conf.{{ hadoop_cluster_name|lower }}' hadoop_mapred_home: /usr/lib/hadoop-0.20-mapreduce hadoop_hdfs_data_disk: - { mountpoint: '/data', device: 'xvda3', fstype: 'xfs' } # # Hadoop default heapsize # The default is 1000 hadoop_default_heapsize: 1024 hadoop_default_java_opts: "-server -Djava.awt.headless=true -Djava.net.preferIPv4Stack=true -XX:+UseConcMarkSweepGC -Dfile.encoding=UTF-8" hadoop_jmx_enabled: False # # HDFS # hdfs_cluster_id: '{{ hadoop_cluster_name }}' hdfs_cluster_nn_id_1: nn1 hdfs_cluster_nn_id_2: nn2 hdfs_cluster_ids: "{{ hdfs_cluster_nn_id_1 }},{{ hdfs_cluster_nn_id_2 }}" hdfs_namenode_1_hostname: '{{ namenode_hostname }}' hdfs_namenode_2_hostname: '{{ secondary_nm_hostname }}' hdfs_data_dir: '{{ hadoop_data_dir }}/dfs' hdfs_nn_data_dir: nn hdfs_dn_data_dir: dn hdfs_dn_balance_bandwidthPerSec: 2097152 hdfs_support_append: "true" hdfs_nn_rpc_port: 8020 hdfs_nn_http_port: 50070 hdfs_nn_client_port: 57045 # handler count. Recommended: ln(number of datanodes) * 20 hdfs_nn_handler_count: 50 # Recommended: up to 128MB, 134217728 bytes (this is the default, is a client parameter) hdfs_block_size: 16777216 hdfs_repl_max: 256 hdfs_replication: 1 # Set to 0 to disable the trash use. Note that the client can enable it. hdfs_fs_trash_interval: 10060 hdfs_datanode_max_xcievers: 1024 hdfs_datanode_http_port: 50075 hdfs_datanode_ipc_port: 50020 hdfs_datanode_rpc_port: 50010 hdfs_dfs_socket_timeout: 600000 hdfs_dfs_socket_write_timeout: 600000 # See http://www.cloudera.com/content/cloudera/en/documentation/cdh4/latest/CDH4-Installation-Guide/cdh4ig_topic_11_6.html hdfs_read_shortcircuit: True hdfs_read_shortcircuit_cache_size: 3000 hdfs_read_shortcircuit_cache_expiry: 50000 hdfs_read_shortcircuit_cache_dir: '/var/run/hadoop-hdfs' hdfs_journal_id: '{{ hdfs_cluster_id }}' hdfs_journal_port: 8485 hdfs_journal_0: '{{ quorum_0_node_hostname }}' hdfs_journal_1: '{{ quorum_1_node_hostname }}' hdfs_journal_2: '{{ quorum_2_node_hostname }}' hdfs_journal_3: '{{ quorum_3_node_hostname }}' hdfs_journal_4: '{{ quorum_4_node_hostname }}' hdfs_journal_data_dir: jn hdfs_journal_http_port: 8480 hdfs_zkfc_port: 8019 hdfs_webhdfs_enabled: True hdfs_users_supergroup: supergroup # The following is used to retrieve the ssh key needed for the HA failover hdfs_user_home: /usr/lib/hadoop httpfs_user: httpfs httpfs_host: 'hue.{{ dns_domain }}' httpfs_host_1: 'nn1.{{ dns_domain }}' httpfs_host_2: 'nn2.{{ dns_domain }}' httpfs_port: 14000 httpfs_catalina_work_dir: /usr/lib/hadoop-httpfs/work # # Zookeeper zookeeper_conf_dir: '/etc/zookeeper/conf.{{ hadoop_cluster_name|lower }}' zookeeper_log_dir: '/var/log/zookeeper' zookeeper_client_port: 2182 zookeeper_quorum_port: 4182 zookeeper_leader_port: 3182 zookeeper_min_timeout: 30000 zookeeper_max_timeout: 240000 zookeeper_quorum_0: '{{ quorum_0_node_hostname }}' zookeeper_quorum_1: '{{ quorum_1_node_hostname }}' zookeeper_quorum_2: '{{ quorum_2_node_hostname }}' zookeeper_quorum_3: '{{ quorum_3_node_hostname }}' zookeeper_quorum_4: '{{ quorum_4_node_hostname }}' zookeeper_maxclient_connections: 240 zookeeper_nodes: "{{ zookeeper_quorum_0 }},{{ zookeeper_quorum_1 }},{{ zookeeper_quorum_2 }},{{ zookeeper_quorum_3 }},{{ zookeeper_quorum_4 }}" zookeeper_cluster: "{{ zookeeper_quorum_0 }}:{{ zookeeper_client_port }},{{ zookeeper_quorum_1 }}:{{ zookeeper_client_port }},{{ zookeeper_quorum_2 }}:{{ zookeeper_client_port }},{{ zookeeper_quorum_3 }}:{{ zookeeper_client_port }},{{ zookeeper_quorum_4 }}:{{ zookeeper_client_port }}" # # Jobtracker # jobtracker_cluster_id: nmis-hadoop-jt jobtracker_node_1_hostname: 'jobtracker.{{ dns_domain }}' jobtracker_node_2_hostname: 'jobtracker2.{{ dns_domain }}' jobtracker_cluster_id_1: jt1 jobtracker_cluster_id_2: jt2 jobtracker_cluster_id1_rpc_port: 8021 jobtracker_cluster_id2_rpc_port: 8022 jobtracker_cluster_id1_ha_rpc_port: 8023 jobtracker_cluster_id2_ha_rpc_port: 8024 jobtracker_cluster_id1_http_port: 50030 jobtracker_cluster_id2_http_port: 50031 jobtracker_http_port: 9290 jobtracker_persistent_jobstatus: 'true' jobtracker_restart_recover: 'false' jobtracker_failover_connect_retries: 3 jobtracker_auto_failover_enabled: 'true' jobtracker_zkfc_port: 8018 # handler count. Recommended: ln(number of datanodes) * 20 jobtracker_handler_count: 50 # We have 12 nodes and 6 CPUs per node # reduce tasks forumla: 0.95 or 1.75 * (nodes * mapred.tasktracker.tasks.maximum) # Cloudera defaults: 2 mappers, 2 reducers max # ------ # tested. too much stress on the hardware #mapred_tasktracker_map_tasks_maximum: 6 #mapred_tasktracker_reduce_tasks_maximum: 68 #mapred_reduce_child_java_opts: "-Xmx2G" # ------ mapred_tasktracker_http_port: 50060 mapred_tasktracker_map_tasks_maximum: 2 mapred_tasktracker_reduce_tasks_maximum: 4 mapred_use_fair_scheduler: True mapred_fair_scheduler_pools: - { name: 'solr', map: '12', reduce: '18' } mapred_fair_scheduler_use_poolnameproperty: True mapred_fair_scheduler_poolnameproperty: user.name mapred_fair_scheduler_undecl_pools: True mapred_fair_scheduler_preemption: False mapred_fair_scheduler_assignmultiple: True mapred_fair_scheduler_allocation_file: '{{ hadoop_conf_dir }}/fair-scheduler.xml' # reducer parallel copies. Recommended: ln(number of datanodes) * 4 # with a minimum of 10 mapred_reduce_parallel_copies: 10 # Recommended: 80 mapred_tasktracker_http_threads: 80 # Default: 0.05. Recommended: 0.8. Used by the jobtracker mapred_reduce_slowstart_maps: 0.9 # Default: 100. We could increase it mapred_tasktracker_io_sort_mb: 256 mapred_io_sort_factor: 25 mapreduce_job_counters_max: 5000 mapred_userlog_retain_hours: 24 mapred_jt_completeuserjobs_max: 150 mapred_jt_persist_jobstatus_hours: 4320 mapred_user_jobconf_limit: 5242880 mapred_jt_retirejob_interval: 86400000 mapreduce_jt_split_metainfo_maxsize: 10000000 mapred_queue_names: default # mapred_staging_root_dir: /user mapred_old_staging_root_dir: /home mapred_local_dir: /data/mapred/local # Java parameters mapred_child_java_opts: "-Xmx3092M" mapred_map_child_java_opts: "-Xmx3092M" #mapred_reduce_child_java_opts: "-Xmx1512M" mapred_reduce_child_java_opts: "-Xmx2048M" # # HBASE # # Raw formula to calculate the needed regionserver heap size: # regions.hbase.hregion.max.filesize / # hbase.hregion.memstore.flush.size * # dfs.replication * # hbase.regionserver.global.memstore.lowerLimit # See: http://hadoop-hbase.blogspot.it/2013/01/hbase-region-server-memory-sizing.html # hbase_user: hbase hbase_conf_dir: '/etc/hbase/conf.{{ hadoop_cluster_name|lower }}' # HBASE heap size hbase_master_heap_size: 5120 hbase_thrift_heap_size: 1024 hbase_regionserver_heap_size: 4500 hbase_master_java_opts: '-Xmx{{ hbase_master_heap_size }}M' hbase_regionserver_maxdirectmemory_size: "-XX:MaxDirectMemorySize=2G" hbase_regionserver_java_opts: '-Xmx{{ hbase_regionserver_heap_size }}M' hbase_thrift_java_opts: '-Xmx{{ hbase_thrift_heap_size }}M' hbase_zookeeper_java_opts: -Xmx1G hbase_thrift_port: 9090 hbase_thrift_jmx_port: 9591 # hbase zookeeper timeout hbase_zookeeper_timeout: '{{ zookeeper_max_timeout }}' # rpc timeout needs to be greater than lease period # See http://hbase.apache.org/book/trouble.client.html hbase_rpc_timeout: 600000 hbase_lease_period: 400000 hbase_open_files: 65536 hbase_master_rpc_port: 60000 hbase_master_http_port: 60010 hbase_regionserver_http_port: 60030 hbase_regionserver_http_1_port: 60020 # This is controversial. When set to 'true' hdfs balances # each table without paying attention to the global balancing hbase_loadbalance_bytable: True # Default is 0.2 hbase_regions_slop: 0.15 # Default is 10. The recommendation is to keep it low when the payload per request grows # We have mixed payloads. hbase_handler_count: 12 # Default was 256M. It's 10737418240 (10GB) since 0.94 # The recommendation is to have it big to decrease the total number of regions # 1288490188 is circa 1.2GB hbase_hregion_max_file_size: 1288490188 hbase_hregion_memstore_mslab_enabled: True # The default 134217728, 128MB. We set it to 256M hbase_hregion_memstore_flush_size: 268435456 # The default is 0.4 hbase_regionserver_global_memstore_lowerLimit: 0.35 # hbase_regionserver_global_memstore_upperLimit: 0.45 hbase_hregion_memstore_block_multiplier: 3 # HBASE thrift server hbase_thrift_server_1: '{{ hbase_master_1_hostname }}' hbase_thrift_server_2: '{{ hbase_master_2_hostname }}' # # nginx uses as reverse proxy to all the web interfaces # nginx_use_ldap_pam_auth: True nginx_pam_svc_name: nginx nginx_ldap_uri: '{{ hadoop_ldap_uri }}' nginx_ldap_base_dn: '{{ hadoop_ldap_base_dn }}' portal_nginx_conf: management-portal portal_pam_svc_name: '{{ nginx_pam_svc_name }}' portal_title: "NeMIS Hadoop Cluster" portal_web_root: /usr/share/nginx/www # # OOZIE and HIVE DB data # oozie_db_type: postgresql oozie_db_name: oozie oozie_db_user: oozie oozie_db_host: db.t.hadoop.research-infrastructures.eu hive_db_type: '{{ oozie_db_type }}' hive_db_name: hive hive_db_user: hive hive_db_host: '{{ oozie_db_host }}' hive_metastore_db_type: '{{ oozie_db_type }}' hive_metastore_db_name: metastore hive_metastore_db_user: metastore hive_metastore_db_host: '{{ oozie_db_host }}' hue_db_type: '{{ oozie_db_type }}' hue_db_name: hue hue_db_user: hue hue_db_host: '{{ oozie_db_host }}' hue_http_port: 8888 oozie_ip: 146.48.123.66 hive_ip: '{{ oozie_ip }}' hue_ip: '{{ oozie_ip }}' # Iptables other_networks: # Marek icm_pl: 213.135.59.0/24 # eri.katsari icm_pl_1: 195.134.66.216/32 # Antonis addresses, need to reach hdfs and zookeeper (ARC). And Glykeria Katsari ilsp_gr: [ '194.177.192.226/32', '194.177.192.223/32', '195.134.66.96/32', '194.177.192.218/32', '194.177.192.231/32', '195.134.66.216/32', '195.134.66.145/32', '194.177.192.118/32', '195.134.66.244' ] # Needed by marek. It's the IIS cluster gateway. iis_pl_1: 213.135.60.74/32 # Jochen icm_1: 129.70.43.118/32 monitoring_group_name: hadoop-cluster nagios_local_plugins_dir: /usr/lib/nagios/plugins/hadoop nagios_common_lib: check_library.sh nagios_monitoring_dir: '/etc/nagios3/objects/{{ monitoring_group_name }}' nagios_root_disk: / nagios_check_disk_w: 10% nagios_check_disk_c: 7% nagios_service_contacts: - andrea.dellamico - claudio.atzori nagios_contactgroup: hadoop-managers nagios_monitoring_server_ip: 146.48.123.23 iptables_default_policy: REJECT