This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.ivanchou; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.fs.*; | |
import org.apache.hadoop.hbase.HBaseConfiguration; | |
import org.apache.hadoop.hbase.HColumnDescriptor; | |
import org.apache.hadoop.hbase.HTableDescriptor; | |
import org.apache.hadoop.hbase.TableName; | |
import org.apache.hadoop.hbase.client.HBaseAdmin; | |
import org.apache.hadoop.hbase.client.HTable; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="ISO-8859-1"?> | |
<!DOCTYPE dblp SYSTEM "dblp.dtd"> | |
<dblp> | |
<article mdate="2011-01-11" key="journals/acta/Saxena96"> | |
<author>Sanjeev Saxena</author> | |
<title>Parallel Integer Sorting and Simulation Amongst CRCW Models.</title> | |
<pages>607-619</pages> | |
<year>1996</year> | |
<volume>33</volume> | |
<journal>Acta Inf.</journal> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# filename: parse_dblp.py | |
# author: ivanchou | |
import codecs, os | |
import xml.etree.ElementTree as ET | |
paper_tag = ('article','inproceedings','proceedings','book', | |
'incollection','phdthesis','mastersthesis','www') | |
class AllEntities: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
for i in $(seq 1 5000); | |
do | |
dd if=input of=${i}.data bs=1k count=1024; # bs stands for block size, count stands for the block number | |
done; |