Skip to content

Instantly share code, notes, and snippets.

View bugcy013's full-sized avatar
🪄
Focusing

Dhanasekaran Anbalagan bugcy013

🪄
Focusing
View GitHub Profile
@bugcy013
bugcy013 / notes.txt
Created February 21, 2013 17:35 — forked from nipra/notes.txt
# Installing CDH4 on a Single Linux Node in Pseudo-distributed Mode
# https://ccp.cloudera.com/display/CDH4DOC/Installing+CDH4+on+a+Single+Linux+Node+in+Pseudo-distributed+Mode
# Installing CDH4 with MRv1 on a Single Linux Node in Pseudo-distributed mode
# On Ubuntu and other Debian systems
nipra@lambda:Downloads$ wget -cv http://archive.cloudera.com/cdh4/one-click-install/precise/amd64/cdh4-repository_1.0_all.deb
nipra@lambda:Downloads$ sudo dpkg -i cdh4-repository_1.0_all.deb # Adds /etc/apt/sources.list.d/cloudera-cdh4.list ??
nipra@lambda:Downloads$ dpkg -L cdh4-repository # To view the files on Ubuntu systems
# Install CDH4
0.check-selinux.log
1.install-repo-pkg.log
2.refresh-repo.log
3.install-oracle-j2sdk1.6.log
4.install deameons
sudo ufw enable
root@dhana:/etc/apt/sources.list.d# cat cloudera-cdh4.list
import glob
import xml.dom.minidom
for f in glob.glob('*/WEB-INF/web.xml'):
print f
dom = xml.dom.minidom.parse(f)
mappings = dom.getElementsByTagName('servlet-mapping')
for m in mappings:
urlp = m.getElementsByTagName('url-pattern')[0].firstChild.data
sname = m.getElementsByTagName('servlet-name')[0].firstChild.data
print " %s (%s)" %(urlp,sname)
from itertools import islice
import os
import sys
import subprocess
CMD = "sudo -u hdfs /usr/lib/hadoop/bin/hadoop dfsadmin -report"
def parse(resultstr):
dic = {}
for line in islice(resultstr, 0, 8):
#!/bin/bash
if [ $# -lt 1 ]; then
SIZE="2048"
else
SIZE=$1
fi
echo "HADOOP_HEAPSIZE=${SIZE}" >> /home/hadoop/conf/hadoop-user-env.sh
sudo -u hdfs hadoop fsck / -files -blocks | grep BAD_BLOCK_ID -B 5
hadoop job -list | grep job_ | awk 'BEGIN{FS="\t";OFS=","};{print $1,strftime("%H:%M:%S", (systime()-int($3/1000)),1),"\""$4"\"","\""$6"\""}'
@bugcy013
bugcy013 / gist:5074641
Last active December 14, 2015 10:49
Hadoop tips
Task tracker
http://172.16.30.128:50060
Task tracker metrics info
http://172.16.30.128:50060/metrics
Data node block scanner details
http://172.16.30.128:50075/blockScannerReport
Secondary datanode status
@bugcy013
bugcy013 / gist:5097700
Created March 6, 2013 08:44
TestDFSIO Error
research@dvcliftonhera227:/usr/lib/hadoop-0.20-mapreduce$ hadoop jar hadoop-test.jar TestDFSIO -read -nrFiles 10 -fileSize 1000
13/03/06 03:27:49 INFO fs.TestDFSIO: TestDFSIO.0.0.6
13/03/06 03:27:49 INFO fs.TestDFSIO: nrFiles = 10
13/03/06 03:27:49 INFO fs.TestDFSIO: fileSize (MB) = 1000.0
13/03/06 03:27:49 INFO fs.TestDFSIO: bufferSize = 1000000
13/03/06 03:27:49 INFO fs.TestDFSIO: baseDir = /benchmarks/TestDFSIO
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/lib/zookeeper/lib/slf4j-log4j12-1.6.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/app_cloe/cloe_hadoop/cloe/deepvalue/slf4j-simple/1.5.8/slf4j-simple-1.5.8.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
root@Datanode-192-168-70-97:~# service cloudera-scm-server-db initdb
root@Datanode-192-168-70-97:~# /etc/init.d/cloudera-scm-server restart
cloudera-scm-server is already stopped
root@Datanode-192-168-70-97:~# service cloudera-scm-server restart
cloudera-scm-server is already stopped
root@Datanode-192-168-70-97:~# service cloudera-scm-server start
Starting cloudera-scm-server: * cloudera-scm-server started
root@Datanode-192-168-70-97:~# cd /var/log/cloudera-scm-server/
root@Datanode-192-168-70-97:/var/log/cloudera-scm-server# ls -ltr