Skip to content

Instantly share code, notes, and snippets.

@silviud
Last active December 16, 2015 00:19
Show Gist options
  • Save silviud/5346358 to your computer and use it in GitHub Desktop.
Save silviud/5346358 to your computer and use it in GitHub Desktop.
Hadoop Cloudera Cluster with HA for NameNode and JobTracker
File - core-site.xml
---------------------
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://Cluster01</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop-nn01:2181,hadoop-nn02:2181,hadoop-nn03:2181</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/data/hadoop-tmp</value>
</property>
</configuration>
File - hdfs-site.xml
--------------------
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- this is not used in cloudera -->
<property>
<name>dfs.namenode.dir</name>
<value>/opt/data/hdfs/1/namenode,/opt/data/hdfs/2/namenode</value>
</property>
<!-- but this is used -->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/data/hdfs/1/namenode,file:///opt/data/hdfs/2/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/opt/data/hdfs/1/datanode,/opt/data/hdfs/2/datanode</value>
</property>
<!-- enable web dfs -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop-nn01:2181,hadoop-nn02:2181,hadoop-nn03:2181</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>Cluster01</value>
</property>
<property>
<name>dfs.ha.namenodes.Cluster01</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.Cluster01.nn1</name>
<value>hadoop-nn01:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.Cluster01.nn2</name>
<value>hadoop-nn02:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.Cluster01.nn1</name>
<value>hadoop-nn01:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.Cluster01.nn2</name>
<value>hadoop-nn02:50070</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>shell(/opt/ubisoft/hadoop/scripts/ha/ha.sh $target_address $target_nameserviceid $target_namenodeid)</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>file:///opt/data/hdfs/shared_nfs/namenode</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
</configuration>
File - mapred-site.xml
-----------------------
<configuration>
<property>
<name>mapred.ha.jobtracker.id</name>
<value>jt1</value>
<!-- host:port string is replaced with a logical name -->
</property>
<property>
<name>mapred.job.tracker</name>
<value>Cluster01</value>
<!-- host:port string is replaced with a logical name -->
</property>
<property>
<name>mapred.jobtrackers.Cluster01</name>
<value>jt1,jt2</value>
<description>Comma-separated list of JobTracker IDs.</description>
</property>
<property>
<name>mapred.jobtracker.rpc-address.Cluster01.jt1</name>
<!-- RPC address for jt1 -->
<value>hadoop-nn01:8021</value>
</property>
<property>
<name>mapred.jobtracker.rpc-address.Cluster01.jt2</name>
<!-- RPC address for jt2 -->
<value>hadoop-nn02:8021</value>
</property>
<property>
<name>mapred.job.tracker.http.address.Cluster01.jt1</name>
<!-- HTTP bind address for jt1 -->
<value>0.0.0.0:50030</value>
</property>
<property>
<name>mapred.job.tracker.http.address.Cluster01.jt2</name>
<!-- HTTP bind address for jt2 -->
<value>0.0.0.0:50031</value>
</property>
<property>
<name>mapred.ha.jobtracker.rpc-address.Cluster01.jt1</name>
<!-- RPC address for jt1 HA daemon -->
<value>hadoop-nn01:8023</value>
</property>
<property>
<name>mapred.ha.jobtracker.rpc-address.Cluster01.jt2</name>
<!-- RPC address for jt2 HA daemon -->
<value>hadoop-nn02:8023</value>
</property>
<property>
<property>
<name>mapred.ha.jobtracker.http-redirect-address.Cluster01.jt1</name>
<!-- HTTP redirect address for jt1 -->
<value>hadoop-nn01:50030</value>
</property>
<property>
<name>mapred.ha.jobtracker.http-redirect-address.Cluster01.jt2</name>
<!-- HTTP redirect address for jt2 -->
<value>hadoop-nn01:50030</value>
</property>
<property>
<name>mapred.jobtracker.restart.recover</name>
<value>true</value>
</property>
<property>
<name>mapred.job.tracker.persist.jobstatus.active</name>
<value>true</value>
</property>
<property>
<name>mapred.job.tracker.persist.jobstatus.hours</name>
<value>1</value>
</property>
<property>
<name>mapred.job.tracker.persist.jobstatus.dir</name>
<value>/opt/data/hdfs/mapred_jobinfo</value>
</property>
<property>
<name>mapred.client.failover.proxy.provider.Cluster01</name>
<value>org.apache.hadoop.mapred.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>mapred.client.failover.max.attempts</name>
<value>15</value>
</property>
<property>
<name>mapred.client.failover.sleep.base.millis</name>
<value>500</value>
</property>
<property>
<name>mapred.client.failover.sleep.max.millis</name>
<value>1500</value>
</property>
<property>
<name>mapred.client.failover.connection.retries</name>
<value>0</value>
</property>
<property>
<name>mapred.client.failover.connection.retries.on.timeouts</name>
<value>0</value>
</property>
<!-- not part of original xml -->
<property>
<name>mapred.ha.fencing.methods</name>
<value>shell(/bin/true)</value>
</property>
<property>
<name>mapred.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>mapred.ha.zkfc.port</name>
<value>8018</value>
<!-- Pick a different port for each failover controller when running one machine -->
</property>
</configuration>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment