Created
November 11, 2014 01:41
-
-
Save OneRaynyDay/2d8d8fa7e23c3203547a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package cs_1c; | |
//class EBookEntry ----------------------------------------------------- | |
public class EBookEntry implements Comparable<EBookEntry> | |
{ | |
private String title, creator, subject; | |
private int eTextNum; | |
public static final int MIN_STRING = 1; | |
public static final int MAX_STRING = 300; | |
public static final int MAX_ENTRY_LENGTH = 10000; | |
public static final int MAX_ID = 100000; | |
// comparable tools | |
public static final int SORT_BY_TITLE = 0; | |
public static final int SORT_BY_CREATOR = 1; | |
public static final int SORT_BY_SUBJECT = 2; | |
public static final int SORT_BY_ID = 3; | |
private static int sortKey = SORT_BY_CREATOR; | |
// default constructor | |
EBookEntry() | |
{ | |
title = ""; | |
creator = ""; | |
subject = ""; | |
eTextNum = 0; | |
} | |
// accessors | |
public String getTitle() { return title; } | |
public String getCreator() { return creator; } | |
public String getSubject() { return subject; } | |
public int getETextNum() { return eTextNum; } | |
// mutators | |
public boolean setTitle(String strArg) | |
{ | |
if ( !validString(strArg) ) | |
return false; | |
title = strArg; | |
return true; | |
} | |
public boolean setCreator(String strArg) | |
{ | |
if ( !validString(strArg) ) | |
return false; | |
creator = strArg; | |
return true; | |
} | |
public boolean setSubject(String strArg) | |
{ | |
if ( !validString(strArg) ) | |
return false; | |
subject = strArg; | |
return true; | |
} | |
public boolean setEtextNum(int intArg) | |
{ | |
if (intArg < 1 || intArg > MAX_ID) | |
return false; | |
eTextNum = intArg; | |
return true; | |
} | |
public static boolean setSortType( int whichType ) | |
{ | |
switch (whichType) | |
{ | |
case SORT_BY_TITLE: | |
case SORT_BY_CREATOR: | |
case SORT_BY_SUBJECT: | |
case SORT_BY_ID: | |
sortKey = whichType; | |
return true; | |
default: | |
return false; | |
} | |
} | |
public int compareTo(EBookEntry other) | |
{ | |
switch (sortKey) | |
{ | |
case SORT_BY_TITLE: | |
return (title.compareToIgnoreCase(other.title)); | |
case SORT_BY_CREATOR: | |
return (creator.compareToIgnoreCase(other.creator)); | |
case SORT_BY_SUBJECT: | |
return (subject.compareToIgnoreCase(other.subject)); | |
case SORT_BY_ID: | |
return (eTextNum - other.eTextNum); | |
default: | |
return 0; | |
} | |
} | |
public String toString() | |
{ | |
return " # " + eTextNum + " ---------------\n" | |
+ " \"" + title + "\"\n" | |
+ " by " + creator + "\n" | |
+ " re: " + subject + "\n\n"; | |
} | |
// utility for testing all String mutator args | |
private static boolean validString(String strArg) | |
{ | |
if (strArg == null) | |
return false; | |
if (strArg.length() < MIN_STRING || strArg.length() > MAX_STRING) | |
return false; | |
return true; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package cs_1c; | |
import java.io.*; | |
import java.util.*; | |
//class EBookEntryReader ----------------------------------------------------- | |
public class EBookEntryReader | |
{ | |
ArrayList<EBookEntry> books = new ArrayList<EBookEntry>(); | |
private int numBooks; | |
private boolean fileOpenError; | |
private String bookFile; | |
// constructor | |
public EBookEntryReader(String fileName) | |
{ | |
EBookEntry book; | |
BufferedReader inFile; | |
String line, entryString; | |
numBooks = 0; | |
fileOpenError = false; | |
bookFile = "NO FILE NAME PROVIDED"; | |
if (fileName.length() == 0) | |
{ | |
fileOpenError = true; | |
return; | |
} | |
bookFile = fileName; | |
// open file for reading | |
try | |
{ | |
// ------- open and read the file | |
inFile = new BufferedReader( | |
new FileReader(fileName) ); | |
while ( inFile.ready() ) | |
{ | |
line = inFile.readLine(); | |
if (isDataLine(line)) | |
{ | |
entryString = readOneEntry(inFile, line); // expands line to entry | |
if (entryString == null) | |
{ | |
fileOpenError = true; | |
break; | |
} | |
// if not an English entry, we'll have prob with chars | |
if ( !entryString.contains( ">en<" ) ) | |
continue; | |
book = readOneBook(entryString); | |
books.add(book); | |
numBooks++; | |
} | |
} | |
inFile.close(); | |
} | |
catch( FileNotFoundException e) | |
{ | |
fileOpenError = true; | |
} | |
catch( IOException e) | |
{ | |
fileOpenError = true; | |
} | |
} | |
// accessors | |
public EBookEntry getBook(int k) | |
{ | |
if (k < 0 || k >= numBooks) | |
return new EBookEntry(); // dummy return | |
return books.get(k); | |
} | |
public String getFileName() { return bookFile; } | |
public boolean readError() { return fileOpenError; } | |
public int getNumBooks() { return numBooks; } | |
// helpers | |
// reads lines from the input stream, concatenating until it finds | |
// the terminating tag, "</pgterms:etext>". Result is a single long | |
// line containing entire record wrapped in | |
// "<pgterms:etext> ... </pgterms:etext>" which it returns to client. | |
// assumes first line containing <pgterms:etext> is already in | |
// line parameter - required for call | |
private String readOneEntry(BufferedReader infile, String line) | |
{ | |
String fullEntry = line; | |
String nextLine = ""; | |
try | |
{ | |
while ( infile.ready() | |
&& fullEntry.length() < EBookEntry.MAX_ENTRY_LENGTH - 20 ) | |
{ | |
nextLine = infile.readLine(); | |
fullEntry += nextLine; | |
if ( nextLine.equals("</pgterms:etext>") ) | |
break; | |
} | |
} | |
catch (IOException e) | |
{ | |
return null; | |
} | |
// if we have an unterminated entry, force it to be terminated. | |
if ( !nextLine.equals("</pgterms:etext>") ) | |
fullEntry += "</pgterms:etext>"; | |
return fullEntry; | |
} | |
// reads one string in the above record (all lines of the record | |
// are assumed to be concatenated into a single line prior to | |
// this call surrounded by <pgterms:etext> ... </pgterms:etext> ) | |
// and leaves data in an EBookEntry object for return to client | |
private EBookEntry readOneBook(String entryString) | |
{ | |
EBookEntry book = new EBookEntry(); | |
book.setEtextNum(readIDFromLine(entryString)); | |
book.setCreator(readStringFromEntry(entryString, "<dc:creator")); | |
book.setTitle(readStringFromEntry(entryString, "<dc:title")); | |
book.setSubject(readStringFromEntry(entryString, "<dc:subject")); | |
return book; | |
} | |
// helpers | |
private boolean isDataLine(String line) | |
{ | |
if (line.length() < 15) | |
return false; | |
// check for a line of the form "<pgterms:etext --- " | |
if ( line.substring(0,14).equals("<pgterms:etext") ) | |
return true; | |
return false; | |
} | |
int readIDFromLine(String line) | |
{ | |
int startNumPos; | |
int numLength; | |
startNumPos = line.indexOf("ID=\"etext") + 9; | |
numLength = line.substring(startNumPos).indexOf( "\""); | |
if (startNumPos < 0 || startNumPos > EBookEntry.MAX_STRING | |
|| numLength < 0 || numLength > 20 ) | |
return 0; | |
else | |
return Integer.valueOf(line.substring(startNumPos, | |
startNumPos + numLength)); | |
} | |
String readStringFromEntry (String entryString, String delimiter) | |
{ | |
int start, stop, k; | |
String stringAfterDelimiter; | |
if (delimiter.length() < 1 || entryString.length() < delimiter.length()) | |
return "(no data found)"; | |
// first find "<dc:title", e.g. | |
start = entryString.indexOf(delimiter); | |
if (start < 0) | |
return "(no data found)"; | |
stringAfterDelimiter = entryString.substring(start+delimiter.length()); | |
// we're looking for something like ">A ..." rather than ">< ... " | |
for (k = 0; k < stringAfterDelimiter.length() - 1; k++) | |
{ | |
// rather than using isLetter() we check manually to avoid foreign | |
if (stringAfterDelimiter.charAt(k) == '>' | |
&& | |
// home-made isLetter() | |
( | |
(stringAfterDelimiter.charAt(k+1) >='a' | |
&& stringAfterDelimiter.charAt(k+1) <= 'z') | |
|| | |
(stringAfterDelimiter.charAt(k+1) >='A' | |
&& stringAfterDelimiter.charAt(k+1) <= 'Z') | |
) | |
) | |
break; | |
} | |
if (k == stringAfterDelimiter.length() - 1) | |
return "(no data found)"; | |
// we've got the starting position for the raw data | |
start = k + 1; | |
stringAfterDelimiter = stringAfterDelimiter.substring(start); // cut tags | |
stop = stringAfterDelimiter.indexOf("<"); // by above condition, cannot be 0 | |
return stringAfterDelimiter.substring(0, stop); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package cs_1c; | |
import java.util.*; | |
// FHhashQP class -------------------------------------------- | |
public class FHhashQP<E> | |
{ | |
protected static final int ACTIVE = 0; | |
protected static final int EMPTY = 1; | |
protected static final int DELETED = 2; | |
static final int INIT_TABLE_SIZE = 97; | |
static final double INIT_MAX_LAMBDA = 0.49; | |
protected HashEntry<E>[] mArray; | |
protected int mSize; | |
protected int mLoadSize; | |
protected int mTableSize; | |
protected double mMaxLambda; | |
// public methods --------------------------------- | |
public FHhashQP(int tableSize) | |
{ | |
mLoadSize = mSize = 0; | |
if (tableSize < INIT_TABLE_SIZE) | |
mTableSize = INIT_TABLE_SIZE; | |
else | |
mTableSize = nextPrime(tableSize); | |
allocateArray(); // uses mTableSize; | |
mMaxLambda = INIT_MAX_LAMBDA; | |
} | |
public FHhashQP() | |
{ | |
this(INIT_TABLE_SIZE); | |
} | |
public boolean insert( E x) | |
{ | |
int bucket = findPos(x); | |
if ( mArray[bucket].state == ACTIVE ) | |
return false; | |
mArray[bucket].data = x; | |
mArray[bucket].state = ACTIVE; | |
mSize++; | |
// check load factor | |
if( ++mLoadSize > mMaxLambda * mTableSize ) | |
rehash(); | |
return true; | |
} | |
public boolean remove( E x ) | |
{ | |
int bucket = findPos(x); | |
if ( mArray[bucket].state != ACTIVE ) | |
return false; | |
mArray[bucket].state = DELETED; | |
mSize--; // mLoadSize not dec'd because it counts any non-EMP location | |
return true; | |
} | |
public boolean contains(E x ) | |
{ | |
return mArray[findPos(x)].state == ACTIVE; | |
} | |
public int size() { return mSize; } | |
void makeEmpty() | |
{ | |
int k, size = mArray.length; | |
for(k = 0; k < size; k++) | |
mArray[k].state = EMPTY; | |
mSize = mLoadSize = 0; | |
} | |
public boolean setMaxLambda( double lam ) | |
{ | |
if (lam < .1 || lam > INIT_MAX_LAMBDA ) | |
return false; | |
mMaxLambda = lam; | |
return true; | |
} | |
// protected methods of class ---------------------- | |
int findPos( E x ) | |
{ | |
int kthOddNum = 1; | |
int index = myHash(x); | |
while ( mArray[index].state != EMPTY | |
&& !mArray[index].data.equals(x) ) | |
{ | |
index += kthOddNum; // k squared = (k-1) squared + kth odd # | |
kthOddNum += 2; // compute next odd # | |
if ( index >= mTableSize ) | |
index -= mTableSize; | |
} | |
return index; | |
} | |
protected void rehash() | |
{ | |
// we save old list and size then we can reallocate freely | |
HashEntry<E>[] oldArray = mArray; | |
int k, oldTableSize = mTableSize;; | |
mTableSize = nextPrime(2*oldTableSize); | |
// allocate a larger, empty array | |
allocateArray(); // uses mTableSize; | |
// use the insert() algorithm to re-enter old data | |
mSize = mLoadSize = 0; | |
for(k = 0; k < oldTableSize; k++) | |
if (oldArray[k].state == ACTIVE) | |
insert( oldArray[k].data ); | |
} | |
protected int myHash(E x) | |
{ | |
int hashVal; | |
hashVal = x.hashCode() % mTableSize; | |
if(hashVal < 0) | |
hashVal += mTableSize; | |
return hashVal; | |
} | |
protected static int nextPrime(int n) | |
{ | |
int k, candidate, loopLim; | |
// loop doesn't work for 2 or 3 | |
if (n <= 2 ) | |
return 2; | |
else if (n == 3) | |
return 3; | |
for (candidate = (n%2 == 0)? n+1 : n ; true ; candidate += 2) | |
{ | |
// all primes > 3 are of the form 6k +/- 1 | |
loopLim = (int)( (Math.sqrt((double)candidate) + 1)/6 ); | |
// we know it is odd. check for divisibility by 3 | |
if (candidate%3 == 0) | |
continue; | |
// now we can check for divisibility of 6k +/- 1 up to sqrt | |
for (k = 1; k <= loopLim; k++) | |
{ | |
if (candidate % (6*k - 1) == 0) | |
break; | |
if (candidate % (6*k + 1) == 0) | |
break; | |
} | |
if (k > loopLim) | |
return candidate; | |
} | |
} | |
void allocateArray() | |
{ | |
int k; | |
mArray = new HashEntry[mTableSize]; | |
for (k = 0; k < mTableSize; k++) | |
mArray[k] = new HashEntry<E>(); | |
} | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.NoSuchElementException; | |
import cs_1c.*; | |
public class FHhashQPwFind<KeyType, E extends Comparable<KeyType>> extends | |
FHhashQP<E> { | |
public E find(KeyType key) throws NoSuchElementException { | |
E queriedObj = mArray[findPosKey(key)].data; | |
if (queriedObj != null) { | |
return queriedObj; | |
} else | |
throw new NoSuchElementException(); | |
} | |
int myHashKey(KeyType key) { | |
int hashVal; | |
hashVal = key.hashCode() % mTableSize; | |
if (hashVal < 0) | |
hashVal += mTableSize; | |
return hashVal; | |
} | |
int findPosKey(KeyType key) { | |
int kthOddNum = 1; | |
int index = myHashKey(key); | |
while (mArray[index].state != EMPTY | |
&& mArray[index].data.compareTo(key) != 0) { | |
index += kthOddNum; // k squared = (k-1) squared + kth odd # | |
kthOddNum += 2; // compute next odd # | |
if (index >= mTableSize) | |
index -= mTableSize; | |
} | |
return index; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.NoSuchElementException; | |
import cs_1c.EBookEntry; | |
import cs_1c.EBookEntryReader; | |
import java.io.IOException; | |
import java.util.Random; | |
// ----------- wrapper classes ------------- | |
class EBookCompInt implements Comparable<Integer> { | |
EBookEntry data; | |
public EBookCompInt(EBookEntry eBook) { | |
data = eBook; | |
} | |
public int compareTo(Integer textNum) { | |
return data.getETextNum() - textNum; | |
} | |
public boolean equals(EBookCompInt eBook) { | |
return data.equals(eBook.data); | |
} | |
public int hashCode() { | |
return data.getETextNum(); | |
} | |
} | |
class EBookCompString implements Comparable<String> { | |
public static final int HASH_BY_TITLE = 0; | |
public static final int HASH_BY_CREATOR = 1; | |
public static final int HASH_BY_SUBJECT = 2; | |
int hashState = HASH_BY_TITLE; | |
EBookEntry data; | |
public EBookCompString(EBookEntry eBook) { | |
data = eBook; | |
} | |
public int compareTo(String textString) { | |
switch (hashState) { | |
case HASH_BY_TITLE: | |
return data.getTitle().compareTo(textString); | |
case HASH_BY_CREATOR: | |
return data.getCreator().compareTo(textString); | |
case HASH_BY_SUBJECT: | |
return data.getSubject().compareTo(textString); | |
} | |
// this should not happen | |
return -1; | |
} | |
public void setState(int state) { | |
hashState = state; | |
} | |
public boolean equals(EBookCompInt eBook) { | |
return data.equals(eBook.data); | |
} | |
public int hashCode() { | |
switch (hashState) { | |
case HASH_BY_TITLE: | |
return data.getTitle().hashCode(); | |
case HASH_BY_CREATOR: | |
return data.getCreator().hashCode(); | |
case HASH_BY_SUBJECT: | |
return data.getSubject().hashCode(); | |
} | |
// should not happen | |
return -1; | |
} | |
} | |
// ------------------------------------------------------ | |
public class Main { | |
public static final int NUM_RANDOM_INDICES = 25; | |
// ------- main -------------- | |
public static void main(String[] args) throws Exception { | |
// create a QP hash table of EBooks .. | |
FHhashQPwFind<Integer, EBookCompInt> hashTableInt = new FHhashQPwFind<Integer, EBookCompInt>(); | |
FHhashQPwFind<String, EBookCompString> hashTableTitle = new FHhashQPwFind<String, EBookCompString>(); | |
FHhashQPwFind<String, EBookCompString> hashTableCreator = new FHhashQPwFind<String, EBookCompString>(); | |
FHhashQPwFind<String, EBookCompString> hashTableSubject = new FHhashQPwFind<String, EBookCompString>(); | |
// get book array | |
EBookEntry[] bookEntries = getDataFromFile("catalog-short4.txt", | |
hashTableInt, hashTableTitle, hashTableCreator, hashTableSubject); | |
Random rand = new Random(); | |
// display NUM_RANDOM_INDICES books from array, should find the book | |
// successfully | |
System.out.println("Display NUM_RANDOM_INDICES books from array"); | |
System.out.println(); | |
for (int k = 0; k < NUM_RANDOM_INDICES; k++) { | |
int randomBookNumber = rand.nextInt(bookEntries.length); | |
EBookEntry book = bookEntries[randomBookNumber]; | |
System.out.println(book); | |
// find book by text number | |
System.out.println("------FINDING BY TEXT NUMBER------"); | |
int number = book.getETextNum(); | |
try { | |
EBookCompInt eCompInt = hashTableInt.find(number); | |
if (eCompInt.data.getETextNum() == number) | |
System.out.println("Successfully find book text number#: " | |
+ number); | |
else | |
System.out.println("Not found book text number#: " + number); | |
} catch (NoSuchElementException e) { | |
System.out.println("Not found book text number#: " + number); | |
} | |
System.out.println(); | |
// find book by title | |
System.out | |
.println("------FINDING BY TEXT TITLE, CREATOR AND SUBJECT------"); | |
String title = book.getTitle(); | |
try { | |
EBookCompString eCompStr = hashTableTitle.find(title); | |
if (eCompStr.data.getTitle().equals(title)) | |
System.out.println("Successfully find book title: " + title); | |
else | |
System.out.println("Not found book title: " + number); | |
} catch (NoSuchElementException e) { | |
System.out.println("Not found book title: " + title); | |
} | |
System.out.println(); | |
// find book by creator | |
String creator = book.getCreator(); | |
try { | |
EBookCompString eCompStr = hashTableCreator.find(creator); | |
if (eCompStr.data.getCreator().equals(creator)) | |
System.out.println("Successfully find book creator: " + creator); | |
else | |
System.out.println("Not found book creator: " + creator); | |
} catch (NoSuchElementException e) { | |
System.out.println("Not found book creator: " + creator); | |
} | |
System.out.println(); | |
// find book by subject | |
String subject = book.getSubject(); | |
try { | |
EBookCompString eCompStr = hashTableSubject.find(subject); | |
if (eCompStr.data.getSubject().equals(subject)) | |
System.out.println("Successfully find book subject: " + subject); | |
else | |
System.out.println("Not found book subject: " + subject); | |
} catch (NoSuchElementException e) { | |
System.out.println("Not found book subject: " + subject); | |
} | |
System.out.println(); | |
} | |
// test known failures exceptions: | |
System.out.println("------ERROR TESTING-------"); | |
try { | |
hashTableInt.find(-3); | |
} catch (NoSuchElementException e) { | |
System.out.println("Not found book text number# -3"); | |
} | |
try { | |
hashTableTitle.find("Jack Kerouac"); | |
} catch (NoSuchElementException e) { | |
System.out.println("Not found book title Jack Kerouac"); | |
} | |
try { | |
hashTableCreator.find("Jack creator"); | |
} catch (NoSuchElementException e) { | |
System.out.println("Not found book creator Jack creator"); | |
} | |
try { | |
hashTableSubject.find("Jack subject"); | |
} catch (NoSuchElementException e) { | |
System.out.println("Not found book subject Jack subject"); | |
} | |
} | |
private static EBookEntry[] getDataFromFile(String filename, | |
FHhashQPwFind<Integer, EBookCompInt> hashTableInt, | |
FHhashQPwFind<String, EBookCompString> hashTableTitle, | |
FHhashQPwFind<String, EBookCompString> hashTableCreator, | |
FHhashQPwFind<String, EBookCompString> hashTableSubject) | |
throws IOException { | |
EBookEntryReader bookInput = new EBookEntryReader(filename); | |
int arraySize; | |
// how we test the success of the read: | |
if (bookInput.readError()) { | |
System.out.println("couldn't open " + bookInput.getFileName() | |
+ " for input."); | |
throw new IOException(); | |
} | |
System.out.println("Successfully read from " + bookInput.getFileName()); | |
// create an array of objects for our own use: | |
arraySize = bookInput.getNumBooks(); | |
EBookEntry[] bookEntries = new EBookEntry[arraySize]; | |
for (int k = 0; k < arraySize; k++) { | |
EBookEntry ebook = bookInput.getBook(k); | |
bookEntries[k] = ebook; | |
EBookCompInt eCompInt = new EBookCompInt(ebook); | |
hashTableInt.insert(eCompInt); | |
EBookCompString eCompString = new EBookCompString(ebook); | |
eCompString.setState(EBookCompString.HASH_BY_TITLE); | |
hashTableTitle.insert(eCompString); | |
eCompString = new EBookCompString(ebook); | |
eCompString.setState(EBookCompString.HASH_BY_CREATOR); | |
hashTableCreator.insert(eCompString); | |
eCompString = new EBookCompString(ebook); | |
eCompString.setState(EBookCompString.HASH_BY_SUBJECT); | |
hashTableSubject.insert(eCompString); | |
} | |
return bookEntries; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment