Skip to content

Instantly share code, notes, and snippets.

View abshingate's full-sized avatar

abshingate abshingate

View GitHub Profile
/**
* Introduction to gist
*/
public class HelloGist{
public static void sayHello(){
System.out.println("Hello gist");
}
}
@abshingate
abshingate / clouderamavenrepository.xml
Created July 7, 2013 15:07
clouderamavenrepository.xml
<repositories>
<repository>
<id>cloudera-releases</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
@abshingate
abshingate / pigdependecy.xml
Last active December 19, 2015 10:49
pig dependecy.xml
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2-cdh3u6</version>
</dependency>
<dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pigunit</artifactId>
<version>0.10.0-cdh3u4</version>
</dependency>
@abshingate
abshingate / wordcount.pig
Last active December 19, 2015 10:58
wordcount.pig
A = load 'src/main/resources/sample.data';
B = foreach A generate flatten(TOKENIZE((chararray)$0)) as word;
C = group B by word;
D = foreach C generate COUNT(B), group;
dump D;
@abshingate
abshingate / sample.data
Last active December 19, 2015 11:19
sample.data
Johny, Johny!
Yes, Papa
Eating sugar?
No, Papa
Telling lies?
No, Papa
Open your mouth!
Ha! Ha! Ha!
@abshingate
abshingate / AppTest.java
Created July 8, 2013 15:21
AppTest.java
public void testStudentsPigScript() throws Exception {
PigTest pigTest = new PigTest("src/main/resources/wordcount.pig");
pigTest.assertOutput("D", new String[] { "(2,No)", "(3,Ha!)",
"(1,Yes)", "(1,Open)", "(3,Papa)", "(1,your)", "(1,Johny)",
"(1,lies?)", "(1,Eating)", "(1,Johny!)", "(1,mouth!)",
"(1,sugar?)", "(1,Telling)", });
}
@abshingate
abshingate / userDefineLang_ApachePig.xml
Last active April 2, 2020 17:35
Apache Pig Language for Notepad++
<NotepadPlus>
<UserLang name="Apache PIG" ext="pig" udlVersion="2.1">
<Settings>
<Global caseIgnored="yes" allowFoldOfComments="yes" foldCompact="no" forcePureLC="0" decimalSeparator="0" />
<Prefix Keywords1="no" Keywords2="no" Keywords3="no" Keywords4="no" Keywords5="no" Keywords6="no" Keywords7="no" Keywords8="no" />
</Settings>
<KeywordLists>
<Keywords name="Comments">00-- 01 02 03/* 04*/</Keywords>
<Keywords name="Numbers, prefix1"></Keywords>
<Keywords name="Numbers, prefix2"></Keywords>
-bash-3.2$ echo -e "mark||2011\ntony|Mr.|2012\nrichard||2013" > input.txt
-bash-3.2$ cat input.txt
mark||2011
tony|Mr.|2012
richard||2013
-bash-3.2$ hadoop fs -copyFromLocal input.txt /user/abhijit/input.txt
-bash-3.2$ hadoop fs -cat /user/abhijit/input.txt
mark||2011
tony|Mr.|2012
richard||2013
CREATE EXTERNAL TABLE person
(
name string,
title string,
birth_year string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
LOCATION '/person';