Colin Kincaid Williams
2014-Sep-05 04:27 UTC
[Puppet Users] roles? paramaters? facts? with somebodys hacked puppet modules...
I have a hacked together class that somebody else wrote based on some wikimedia puppet scripts. I've been asked to set the JAVA_HEAP_MAX if the host is a resource manager. I'm trying to set the variable yarn_env_java_heap_max if the host is a rm. Is it possible to set the paramater as below? Do I have to create a separate role just to set this paramater? Am I better off checking if a rm in the yarn-site.sh.erb template itself? if ($::hostname in $rm_hosts){ yarn_env_java_heap_max => '-Xmx4000M', } I am accsessing the variable in the yarn-site.sh.erb template as follows: <% if @yarn_env_java_heap_max -%> JAVA_HEAP_MAX = <%= yarn_env_java_heap_max %> <% end -%> the role class # role/wh/hadoop.pp # borrowed from https://git.wikimedia.org/tree/operations%2Fpuppet.git # Role classes for Hadoop nodes. # # Usage: # # To install only hadoop client packages and configs: # include role::wh::hadoop # # To install a Hadoop Master (NameNode + ResourceManager, etc.): # include role::wh::hadoop::master # # To install a Hadoop Worker (DataNode + NodeManager + etc.): # include role::wh::hadoop::worker # class role::wh::hadoop::client { # include common labs or production hadoop configs case $common::data::env { 'dev': { include role::wh::hadoop::dev anchor { 'cdh5_hadoop_dev_first': } -> Class['role::wh::hadoop::dev'] -> anchor { 'cdh5_hadoop_dev_last': } } 'qa' : { include role::wh::hadoop::qa anchor { 'cdh5_hadoop_qa_first': } -> Class['role::wh::hadoop::qa'] -> anchor { 'cdh5_hadoop_qa_last': } } 'prod': { include role::wh::hadoop::production anchor { 'cdh5_hadoop_production_first': } -> Class['role::wh::hadoop::production'] -> anchor { 'cdh5_hadoop_production_last': } } default: { fail("Unrecognized environment type for hadoop") } } } class role::wh::hadoop::journalnode inherits role::wh::hadoop::client { motd::register{ 'Hadoop Journal Node': } class { 'cdh5::hadoop::journalnode' :} anchor { 'cdh5_hadoop_journalnode_first': } -> Class['cdh5::hadoop::journalnode'] -> anchor { 'cdh5_hadoop_journalnode_last': } } class role::wh::hadoop::master inherits role::wh::hadoop::client { motd::register{ 'Hadoop Master (NameNode, ResourceManager & HistoryServer)': } system::mkdirs{'/var/lib/hadoop-hdfs/cache/hdfs/dfs/name' :} system::mkdirs{'/var/lib/hadoop-hdfs/journalEdits' :} class { 'cdh5::hadoop::master' :} anchor { 'cdh5_hadoop_master_first': } -> Class['cdh5::hadoop::master'] -> anchor { 'cdh5_hadoop_master_last': } } class role::wh::hadoop::worker inherits role::wh::hadoop::client { motd::register{ 'Hadoop Worker (DataNode & NodeManager)': } class { 'cdh5::hadoop::worker' : } anchor { 'cdh5_hadoop_worker_first': } -> Class['cdh5::hadoop::worker'] -> anchor { 'cdh5_hadoop_worker_last': } } class role::wh::hadoop::standby inherits role::wh::hadoop::client { motd::register{ 'Hadoop Standby NameNode': } system::mkdirs{'/var/lib/hadoop-hdfs/cache/hdfs/dfs/name' :} system::mkdirs{'/var/lib/hadoop-hdfs/journalEdits' :} class { 'cdh5::hadoop::namenode' : } class {'cdh5::hadoop::resourcemanager' : } anchor { 'cdh5_wh_hadoop_standby_first': } -> Class['cdh5::hadoop::namenode'] -> Class['cdh5::hadoop::resourcemanager'] -> anchor { 'cdh5_wh_hadoop_standby_last': } } class role::wh::hadoop::primary::postinstall inherits role::wh::hadoop::client { class { 'cdh5::hadoop::namenode::primarypostinstall' : } anchor { 'cdh5_wh_hadoop_primarypostinstall_first': } -> Class['cdh5::hadoop::namenode::primarypostinstall'] -> anchor { 'cdh5_wh_hadoop_primarypostinstall_last': } } class role::wh::hadoop::standby::postinstall inherits role::wh::hadoop::client { class { 'cdh5::hadoop::namenode::standbypostinstall' : } anchor { 'cdh5_wh_hadoop_standbypostinstall_first': } -> Class['cdh5::hadoop::namenode::standbypostinstall'] -> anchor { 'cdh5_wh_hadoop_standbypostinstall_last': } } ### The following classes should not be included directly. ### You should either include role::wh::hadoop::client, ### or role::wh::hadoop::worker or ### role::wh::hadoop::master. class role::wh::hadoop::production { class { 'cdh5::hadoop': namenode_hosts => [ 'us3sm2nn010r07.comp.prod.local', 'us3sm2nn011r08.comp.prod.local', ], rm_hosts => [ 'us3sm2nn010r07.comp.prod.local', 'us3sm2nn011r08.comp.prod.local', ], dfs_name_dir => [['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name', '/nfs/namedir']], config_directory => '/etc/hadoop/conf', nameservice_id => 'whprod', # This is the logical name of the Hadoop cluster. journalnode_hosts => [ 'us3sm2zk010r07.comp.prod.local', 'us3sm2zk011r08.comp.prod.local', 'us3sm2zk012r09.comp.prod.local', ], dfs_journalnode_edits_dir => '/var/lib/hadoop-hdfs/journalEdits', datanode_mounts => [ '/data1', '/data2', '/data3', '/data4', '/data5', '/data6', '/data7', '/data8', '/data9', '/data10', '/data11' ], dfs_data_path => 'dfs', dfs_block_size => 268435456, # 256 MB # Turn on Snappy compression by default for maps and final outputs mapreduce_intermediate_compression => true, mapreduce_intermediate_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec', mapreduce_output_compression => true, mapreduce_output_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec', mapreduce_output_compression_type => 'BLOCK', #mapreduce_map_tasks_maximum => ($::processorcount - 2) / 2, #mapreduce_reduce_tasks_maximum => ($::processorcount - 2) / 2, #mapreduce_job_reuse_jvm_num_tasks => 1, #mapreduce_map_memory_mb => 1536, #mapreduce_reduce_memory_mb => 3072, #mapreduce_map_java_opts => '-Xmx1024M', #mapreduce_reduce_java_opts => '-Xmx2560M', #mapreduce_reduce_shuffle_parallelcopies => 10, #mapreduce_task_io_sort_mb => 200, #mapreduce_task_io_sort_factor => 10, if ($::hostname in $rm_hosts){ yarn_env_java_heap_max => '-Xmx4000M', } yarn_nodemanager_resource_memory_mb => 40960, yarn_resourcemanager_scheduler_class => 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler', net_topology_script_template => 'hadoop/getRackID.py-prod', } anchor { 'cdh5_hadoop_first': } -> Class['cdh5::hadoop'] -> anchor { 'cdh5_hadoop_last': } file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml": content => template('hadoop/capacity-scheduler.xml-prod'), require => Class['cdh5::hadoop'], } } class role::wh::hadoop::qa { class { 'cdh5::hadoop': namenode_hosts => [ 'us3sm2hbqa03r09.comp.prod.local', 'us3sm2hbqa04r07.comp.prod.local', ], rm_hosts => [ 'us3sm2hbqa03r09.comp.prod.local', 'us3sm2hbqa04r07.comp.prod.local', ], dfs_name_dir => [['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name']], config_directory => '/etc/hadoop/conf', nameservice_id => 'whqa', # This is the logical name of the Hadoop cluster. journalnode_hosts => [ 'us3sm2hbqa03r09.comp.prod.local', 'us3sm2hbqa04r07.comp.prod.local', 'us3sm2hbqa05r08.comp.prod.local', ], dfs_journalnode_edits_dir => '/var/lib/hadoop-hdfs/journalEdits', datanode_mounts => [ '/data1', '/data2' ], dfs_data_path => 'dfs', dfs_block_size => 268435456, # 256 MB # Turn on Snappy compression by default for maps and final outputs mapreduce_intermediate_compression => true, mapreduce_intermediate_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec', mapreduce_output_compression => true, mapreduce_output_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec', mapreduce_output_compression_type => 'BLOCK', yarn_nodemanager_resource_memory_mb => 24576, yarn_resourcemanager_max_completed_applications => 500, yarn_resourcemanager_scheduler_class => 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler', net_topology_script_template => 'hadoop/getRackID.py-qa', } anchor { 'cdh5_wh_hadoop_qa_first': } -> Class['cdh5::hadoop'] -> anchor { 'cdh5_wh_hadoop_qa_last': } file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml": content => template('hadoop/capacity-scheduler.xml-qa'), require => Class['cdh5::hadoop'], } } class role::wh::hadoop::dev { class { 'cdh5::hadoop': namenode_hosts => [$::fqdn], rm_hosts => [$::fqdn], dfs_name_dir => [['/var/lib/hadoop-hdfs/cache/hdfs/dfs/name']], config_directory => '/etc/hadoop/conf', # nameservice_id => 'whdev', journalnode_hosts => [$::fqdn], dfs_journalnode_edits_dir => '/var/lib/hadoop-hdfs/journalEdits', datanode_mounts => [ '/data1', '/data2' ], dfs_data_path => 'dfs', dfs_block_size => 67108864, # 256 MB # Turn on Snappy compression by default for maps and final outputs mapreduce_intermediate_compression => true, mapreduce_intermediate_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec', mapreduce_output_compression => true, mapreduce_output_compression_codec => 'org.apache.hadoop.io.compress.SnappyCodec', mapreduce_output_compression_type => 'BLOCK', mapreduce_map_tasks_maximum => 2, mapreduce_reduce_tasks_maximum => 2, yarn_nodemanager_resource_memory_mb => 4096, yarn_resourcemanager_scheduler_class => 'org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler', } anchor { 'cdh5_hadoop_first': } -> Class['cdh5::hadoop'] -> anchor { 'cdh5_hadoop_last': } file { "$::cdh5::hadoop::config_directory/capacity-scheduler.xml": content => template('hadoop/capacity-scheduler.xml-qa'), require => Class['cdh5::hadoop'], } } Note that the main hadoop class has a yarn_env_java_heap_max parameter that I added, which also is given a default value. -- You received this message because you are subscribed to the Google Groups "Puppet Users" group. To unsubscribe from this group and stop receiving emails from it, send an email to puppet-users+unsubscribe@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/puppet-users/9d134b61-191b-407c-aabf-9b02c512de37%40googlegroups.com. For more options, visit https://groups.google.com/d/optout.