Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit bae4ed7

Browse files
author
Min Zhou
committed
enhanced operations on yarn scheduler and mapreduce jobhistory server
1 parent df6ee45 commit bae4ed7

File tree

15 files changed

+161
-97
lines changed

15 files changed

+161
-97
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Emacs temp files
2+
\#*\#

group_vars/all

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ yarn_nodemanager_vmem_pmem_ratio: 10
5454
yarn_nodemanager_resource_memory_mb: 24576
5555
yarn_nodemanager_pmem_check_enabled: 'true'
5656
yarn_nodemanager_vmem_check_enabled: 'true'
57+
yarn_nodemanager_cgroups_mount_path: /cgroup
5758

5859
# mapred-site_xml
59-
mapreduce_map_memory_mb: 4096
60-
mapreduce_reduce_memory_mb: 8192
61-
mapreduce_map_java_opts: '-Xmx3072m'
62-
mapreduce_reduce_java_opts: '-Xmx6144m'
63-
mapreduce_jobtracker_handler_count: 128
60+
mapreduce_map_memory_mb: 1024
61+
mapreduce_reduce_memory_mb: 1024
62+
mapreduce_map_java_opts: '-Xmx4096m'
63+
mapreduce_reduce_java_opts: '-Xmx4096m'

roles/cdh_hadoop_config/tasks/.#main.yml

Lines changed: 0 additions & 1 deletion
This file was deleted.

roles/cdh_hadoop_config/tasks/main.yml

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,13 @@
6161
template: src={{ item }} dest=/etc/hadoop/conf.{{ site_name|lower }}/{{ item }} owner=hadoop group=hadoop mode=0644
6262
with_items:
6363
- hadoop-env.sh
64-
notify:
65-
- restart hadoop-hdfs-namenode
66-
- restart hadoop-hdfs-journalnode
67-
- restart hadoop-hdfs-datanode
68-
- restart hadoop-mapreduce-historyserver
69-
- restart hadoop-yarn-nodemanager
70-
- restart hadoop-yarn-resourcemanager
64+
# notify:
65+
# - restart hadoop-hdfs-namenode
66+
# - restart hadoop-hdfs-journalnode
67+
# - restart hadoop-hdfs-datanode
68+
# - restart hadoop-mapreduce-historyserver
69+
# - restart hadoop-yarn-nodemanager
70+
# - restart hadoop-yarn-resourcemanager
7171
tags:
7272
- update-hadoop-env
7373
- update-hadoop-config
@@ -116,7 +116,7 @@
116116
- update-hadoop-yarn-config
117117

118118
- name: configure container executor in /etc/hadoop/conf.{{ site_name|lower }}
119-
template: src={{ item }} dest=/etc/hadoop/conf/{{ item }} owner=yarn group=yarn mode=0644
119+
template: src={{ item }} dest=/etc/hadoop/conf/{{ item }} owner=root group=root mode=0600
120120
with_items:
121121
- container-executor.cfg
122122
# notify:
@@ -125,3 +125,13 @@
125125
tags:
126126
- update-hadoop-yarn-executor-config
127127
- update-hadoop-yarn-config
128+
129+
130+
- name: configure mapreduce site
131+
template: src={{ item }} dest=/etc/hadoop/conf/{{ item }} owner=yarn group=yarn mode=0644
132+
with_items:
133+
- mapred-site.xml
134+
tags:
135+
- update-hadoop-mapreduce-site
136+
- update-hadoop-mapreduce
137+
- update-hadoop

roles/cdh_hadoop_config/templates/container-executor.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ banned.users=hadoop,hdfs,zookeeper
55
#Prevent other super-users
66
min.user.id=1000
77
##comma separated list of system users who CAN run applications
8-
allowed.system.users=
8+
allowed.system.users=yarn

roles/cdh_hadoop_config/templates/fair-scheduler.xml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,26 @@
44

55
<allocations>
66
<queue name="big-etl">
7-
<minResources>96000 mb,0vcores</minResources>
8-
<maxResources>200000 mb,0vcores</maxResources>
7+
<minResources>16000 mb, 16vcores</minResources>
8+
<maxResources>200000 mb,64vcores</maxResources>
99
<maxRunningApps>128</maxRunningApps>
10-
<maxAMShare>0.1</maxAMShare>
10+
<maxAMShare>0.1f</maxAMShare>
1111
<weight>2.0</weight>
1212
<schedulingPolicy>fair</schedulingPolicy>
1313
<aclSubmitApps> big-etl</aclSubmitApps>
1414
</queue>
1515

1616
<queue name="big-analyst">
17-
<minResources>128000 mb,0vcores</minResources>
18-
<maxResources>160000 mb,0vcores</maxResources>
17+
<minResources>16000 mb,16vcores</minResources>
18+
<maxResources>160000 mb,84vcores</maxResources>
1919
<maxRunningApps>64</maxRunningApps>
20-
<maxAMShare>0.1</maxAMShare>
20+
<maxAMShare>0.1f</maxAMShare>
2121
<weight>1.0</weight>
2222
<schedulingPolicy>fair</schedulingPolicy>
2323
<aclSubmitApps> big-analyst</aclSubmitApps>
2424
</queue>
2525

26-
<queueMaxAMShareDefault>0.5</queueMaxAMShareDefault>
26+
<queueMaxAMShareDefault>0.1f</queueMaxAMShareDefault>
2727
<userMaxAppsDefault>4</userMaxAppsDefault>
2828

2929
<queuePlacementPolicy>

roles/cdh_hadoop_config/templates/mapred-site.xml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,6 @@
6868
<name>mapreduce.job.ubertask.enable</name>
6969
<value>true</value>
7070
</property>
71-
<property>
72-
<name>mapreduce.local.dir</name>
73-
<value>{% for dir in mapreduce_local_dir %}file://{{ dir }}{% if not loop.last %},{% endif %}{% endfor %}</value>
74-
</property>
7571
<property>
7672
<name>mapreduce.job.maps</name>
7773
<value>1</value>

roles/cdh_hadoop_config/templates/yarn-site.xml

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@
3333
</property>
3434

3535
<!-- Resource Manager -->
36+
<property>
37+
<name>yarn.resourcemanager.scheduler.address</name>
38+
<value>{{ hostvars[groups['resourcemanager'][0]]['ansible_fqdn'] }}:8030</value>
39+
</property>
3640
<property>
3741
<name>yarn.resourcemanager.resource-tracker.address</name>
3842
<value>{{ hostvars[groups['resourcemanager'][0]]['ansible_fqdn'] }}:8031</value>
@@ -41,10 +45,6 @@
4145
<name>yarn.resourcemanager.address</name>
4246
<value>{{ hostvars[groups['resourcemanager'][0]]['ansible_fqdn'] }}:8032</value>
4347
</property>
44-
<property>
45-
<name>yarn.resourcemanager.scheduler.address</name>
46-
<value>{{ hostvars[groups['resourcemanager'][0]]['ansible_fqdn'] }}:8030</value>
47-
</property>
4848
<property>
4949
<name>yarn.resourcemanager.admin.address</name>
5050
<value>{{ hostvars[groups['resourcemanager'][0]]['ansible_fqdn'] }}:8033</value>
@@ -53,6 +53,11 @@
5353
<name>yarn.resourcemanager.webapp.address</name>
5454
<value>{{ hostvars[groups['resourcemanager'][0]]['ansible_fqdn'] }}:8088</value>
5555
</property>
56+
<property>
57+
<name>yarn.resourcemanager.recovery.enabled</name>
58+
<value>false</value>
59+
</property>
60+
5661

5762
<!-- Scheduler Configurations -->
5863
<property>
@@ -96,6 +101,32 @@
96101
<value>false</value>
97102
</property>
98103

104+
105+
<!-- NodeManager Configs -->
106+
<property>
107+
<name>yarn.nodemanager.localizer.address</name>
108+
<value>0.0.0.0:8040</value>
109+
</property>
110+
<property>
111+
<name>yarn.nodemanager.address</name>
112+
<value>0.0.0.0:8041</value>
113+
</property>
114+
<property>
115+
<name>yarn.nodemanager.webapp.address</name>
116+
<value>0.0.0.0:8042</value>
117+
</property>
118+
<property>
119+
<name>yarn.nodemanager.admin-env</name>
120+
<value>MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX</value>
121+
</property>
122+
<property>
123+
<name>yarn.nodemanager.env-whitelist</name>
124+
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,HADOOP_YARN_HOME</value>
125+
</property>
126+
<property>
127+
<name>yarn.nodemanager.container-executor.class</name>
128+
<value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
129+
</property>
99130
<!-- Specify the Directories -->
100131
<property>
101132
<name>yarn.nodemanager.local-dirs</name>
@@ -105,6 +136,14 @@
105136
<name>yarn.nodemanager.log-dirs</name>
106137
<value>{% for dir in yarn_nodemanager_log_dirs %}file://{{ dir }}{% if not loop.last %},{% endif %}{% endfor %}</value>
107138
</property>
139+
<property>
140+
<name>yarn.nodemanager.aux-services</name>
141+
<value>mapreduce_shuffle</value>
142+
</property>
143+
<property>
144+
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
145+
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
146+
</property>
108147

109148
<!-- NodeManager Log Aggregation -->
110149
<property>
@@ -113,12 +152,8 @@
113152
</property>
114153
<property>
115154
<name>yarn.nodemanager.remote-app-log-dir</name>
116-
<value>hdfs://{{ site_name|lower }}/var/log/hadoop-yarn/apps</value>
155+
<value>hdfs://{{ site_name|lower }}/system/app-logs</value>
117156
</property>
118-
<!-- <property> -->
119-
<!-- <name>yarn.nodemanager.remote-app-log-dir</name> -->
120-
<!-- <value>/var/log/hadoop-yarn/apps</value> -->
121-
<!-- </property> -->
122157
<property>
123158
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
124159
<value>yarn_aggregated_logs</value>
@@ -143,11 +178,11 @@
143178
</property>
144179
<property>
145180
<name>yarn.nodemanager.linux-container-executor.cgroups.mount</name>
146-
<value>false</value>
181+
<value>true</value>
147182
</property>
148183
<property>
149184
<name>yarn.nodemanager.linux-container-executor.cgroups.mount-path</name>
150-
<value>/cgroup</value>
185+
<value>{{ yarn_nodemanager_cgroups_mount_path }}</value>
151186
</property>
152187
<property>
153188
<name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name>
@@ -173,16 +208,12 @@
173208
$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
174209
$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
175210
$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
176-
$YARN_HOME/*,$YARN_HOME/lib/*
211+
$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*
177212
</value>
178213
</property>
179214
<property>
180-
<name>yarn.nodemanager.aux-services</name>
181-
<value>mapreduce_shuffle</value>
182-
</property>
183-
<property>
184-
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
185-
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
215+
<name>yarn.app.mapreduce.am.staging-dir</name>
216+
<value>/user</value>
186217
</property>
187218

188219
</configuration>

roles/cdh_hadoop_journalnode/tasks/main.yml

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,18 @@
66
- name: create the data directory for journal nodes edits
77
file: path={{ item }} state=directory owner=hdfs group=hdfs mode=0700
88
with_items:
9-
- "{{ dfs_journalnode_edits_dir }}"
9+
- "{{ dfs_journalnode_edits_dir }}"
1010
tags:
11-
- install-hadoop-hdfs-journalnode
12-
- install-hadoop-hdfs
13-
- install-hadoop
11+
- install-hadoop-hdfs-journalnode
12+
- install-hadoop-hdfs
13+
- install-hadoop
1414

1515
- name: install hadoop-hdfs-journalnode via apt
1616
apt: name=hadoop-hdfs-journalnode state=present update_cache=yes force=yes
1717
tags:
18-
- install-hadoop-hdfs-journalnode
19-
- install-hadoop-hdfs
20-
- install-hadoop
21-
22-
- name: remove hadoop-hdfs-journalnode via apt
23-
apt: name=hadoop-hdfs-journalnode state=absent purge=yes
24-
tags:
25-
- remove-hadoop-hdfs-journalnode
26-
- remove-hadoop-hdfs
27-
- remove-hadoop
18+
- install-hadoop-hdfs-journalnode
19+
- install-hadoop-hdfs
20+
- install-hadoop
2821

2922
# - name: Cleanup the dependencies of hadoop-hdfs-journalnode
3023
# shell: apt-get autoremove -y
@@ -34,8 +27,9 @@
3427
# - remove-hadoop
3528

3629
- name: start hadoop-hdfs-journalnode
37-
service: name=hadoop-hdfs-journalnode state=started
30+
service: name=hadoop-hdfs-journalnode state=restarted
3831
tags:
32+
- install-hadoop-hdfs-journalnode
3933
- start-hadoop-hdfs-journalnode
4034
- start-hadoop-hdfs
4135
- start-hadoop
@@ -47,6 +41,13 @@
4741
- stop-hadoop-hdfs
4842
- stop-hadoop
4943

44+
- name: remove hadoop-hdfs-journalnode via apt
45+
apt: name=hadoop-hdfs-journalnode state=absent purge=yes
46+
tags:
47+
- remove-hadoop-hdfs-journalnode
48+
- remove-hadoop-hdfs
49+
- remove-hadoop
50+
5051
- name: configure rsyslog for hadoop-hdfs-journalnode
5152
template: src=rsyslog.conf dest=/etc/rsyslog.d/60-hadoop-hdfs-journalnode.conf owner=root group=root mode=0644
5253
tags: rsyslog
Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
---
2+
# file: roles/cdh_hadoop_mapreduce/tasks/main.yml
23

34
- name: install hadoop-mapreduce via apt
4-
apt: name=hadoop-mapreduce
5-
tags: hadoop
5+
apt: name=hadoop-mapreduce state=present force=yes
6+
tags:
7+
- install-hadoop-mapreduce
8+
9+
- name: remove hadoop-mapreduce packages via apt
10+
apt: name=hadoop-mapreduce state=absent force=yes
11+
tags:
12+
- remove-hadoop-mapreduce
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
---
22

33
- name: install hadoop-mapreduce-historyserver via apt
4-
apt: name=hadoop-mapreduce-historyserver
5-
tags: hadoop
4+
apt: name=hadoop-mapreduce-historyserver state=present force=yes
5+
tags:
6+
- install-hadoop-mapreduce-historyserver

roles/cdh_hadoop_user/tasks/main.yml

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,9 @@
1-
- name: Ensure mapred groups are present
2-
group: name={{ item }} state=present
3-
with_items:
4-
- hadoop
5-
- hdfs
6-
- yarn
7-
- mapred
8-
- httpfs
1+
- name: Ensure group are present
2+
group: name={{ group }} state=present
93
tags:
104
- create-hadoop-user
115

12-
- name: Add user for hadoop
13-
user: name=hadoop group=hadoop state=present
14-
tags:
15-
- create-hadoop-user
16-
17-
- name: Add user for hdfs
18-
user: name=hdfs groups=hadoop,hdfs state=present
19-
tags:
20-
- create-hadoop-user
21-
22-
- name: Add user yarn to group mapred and yarn
23-
user: name=yarn groups=hadoop,mapred,yarn state=present
24-
tags:
25-
- create-hadoop-user
26-
27-
- name: Add user mapred to group mapred
28-
user: name=mapred groups=hadoop,mapred state=present
29-
tags:
30-
- create-hadoop-user
31-
32-
- name: Add root to all groups
33-
user: name=root groups=hadoop,hdfs,yarn,mapred,httpfs state=present
6+
- name: Add user {{ user }}
7+
user: name={{ user }} group={{ group }} state=present
348
tags:
359
- create-hadoop-user

0 commit comments

Comments
 (0)