Created
April 13, 2016 00:30
-
-
Save crearo/dafd240bf9d1dffcb0c21611e77fea3b to your computer and use it in GitHub Desktop.
DA-IICT Resource Center Data Scraping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.BufferedWriter; | |
import java.io.File; | |
import java.io.FileWriter; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import org.openqa.selenium.By; | |
import org.openqa.selenium.WebDriver; | |
import org.openqa.selenium.WebElement; | |
import org.openqa.selenium.firefox.FirefoxDriver; | |
import org.openqa.selenium.support.ui.Select; | |
public class ScrapeRC { | |
WebDriver driver; | |
ArrayList<BookIssue> allBooks; | |
ArrayList<String> loginUnsuccessful; | |
ArrayList<NumIssuedBook> numIssuedBooks; | |
public static void main(String[] args) { | |
ScrapeRC rc = new ScrapeRC(); | |
rc.allBooks = new ArrayList<ScrapeRC.BookIssue>(); | |
rc.loginUnsuccessful = new ArrayList<String>(); | |
rc.numIssuedBooks = new ArrayList<NumIssuedBook>(); | |
rc.setup(); | |
for (int i = 1; i <= 10; i++) { | |
String b = String.format("%03d", i); | |
rc.login("201301" + b); | |
} | |
for (int i = 401; i <= 459; i++) { | |
rc.login("201301" + i); | |
} | |
rc.driver.close(); | |
System.out.println("Fini"); | |
try { | |
File file = new File("/home/rish/rc-file.txt"); | |
if (!file.exists()) { | |
file.createNewFile(); | |
} | |
FileWriter fw = new FileWriter(file.getAbsoluteFile(), true); | |
BufferedWriter bw = new BufferedWriter(fw); | |
for (BookIssue bookIssue : rc.allBooks) | |
bw.write(bookIssue.toString() + ",\n"); | |
bw.close(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
try { | |
File file = new File("/home/rish/rc-numIssued.txt"); | |
if (!file.exists()) { | |
file.createNewFile(); | |
} | |
FileWriter fw = new FileWriter(file.getAbsoluteFile(), true); | |
BufferedWriter bw = new BufferedWriter(fw); | |
for (NumIssuedBook bookIssue : rc.numIssuedBooks) | |
bw.write(bookIssue.toString() + ",\n"); | |
bw.close(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
try { | |
File file = new File("/home/rish/rc-loginFailed.txt"); | |
if (!file.exists()) { | |
file.createNewFile(); | |
} | |
FileWriter fw = new FileWriter(file.getAbsoluteFile(), true); | |
BufferedWriter bw = new BufferedWriter(fw); | |
for (String bookIssue : rc.loginUnsuccessful) | |
bw.write(bookIssue.toString() + ",\n"); | |
bw.close(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
public void setup() { | |
driver = new FirefoxDriver(); | |
} | |
public boolean login(String id) { | |
try { | |
driver.navigate().to("http://resourcecentre.daiict.ac.in:8081/webslim/Login.asp?Logout=Yes"); | |
WebElement userName_editbox = driver.findElement(By.name("txtID")); | |
WebElement password_editbox = driver.findElement(By.name("txtPW")); | |
WebElement submit_button = driver.findElement(By.name("Action")); | |
userName_editbox.sendKeys(id); | |
password_editbox.sendKeys(id); | |
submit_button.click(); | |
if (driver.getCurrentUrl().equals("http://resourcecentre.daiict.ac.in:8081/webslim/Default.asp")) { | |
System.out.println("Logged in " + id); | |
getUserName(); | |
findBooks(id); | |
return true; | |
} else { | |
System.out.println("Unable to login " + id); | |
loginUnsuccessful.add(id); | |
return false; | |
} | |
} catch (Exception e) { | |
e.printStackTrace(); | |
return false; | |
} | |
} | |
public void getUserName() { | |
// System.out.println(driver.findElement(By.xpath("//*[@id='tblTab']/tbody/tr[2]/td[1]/font")).getText()); | |
} | |
public void findBooks(String uid) { | |
driver.navigate().to("http://resourcecentre.daiict.ac.in:8081/webslim/VuMyBooks.asp"); | |
driver.findElement(By.name("txtnDays")).sendKeys("7250"); | |
new Select(driver.findElement(By.name("MyList"))).selectByVisibleText("Detail Transaction"); | |
driver.findElement(By.name("DataAction")).click(); | |
if (driver.getPageSource().contains("Record(s) Hits")) { | |
int indexHits = driver.getPageSource().indexOf("Record(s) Hits"); | |
String subHits = driver.getPageSource().substring(indexHits); | |
String stringHits = subHits.substring("Record(s) Hits".length(), subHits.indexOf("</b>")); | |
stringHits = stringHits.replace("<b>", "").trim(); | |
int totalHits = Integer.parseInt(stringHits); | |
System.out.println("Total hits = " + totalHits); | |
numIssuedBooks.add(new NumIssuedBook(uid, totalHits)); | |
// parseAndSaveBooks(uid); | |
// int currentPage = 1; | |
// while (shouldGoNextPage(totalHits, currentPage)) { | |
// parseAndSaveBooks(uid); | |
// currentPage++; | |
// driver.findElement(By.xpath("/html/body/font/table/tbody/tr/td[2]/table[2]/tbody/tr/td[1]/form/input[3]")) | |
// .click(); | |
// } | |
} | |
} | |
public void parseAndSaveBooks(String uid) { | |
ArrayList<WebElement> elements = (ArrayList<WebElement>) driver.findElements(By | |
.xpath("/html/body/font/table/tbody/tr/td[2]/table[1]/tbody")); | |
String tableText = elements.get(0).getText(); | |
String lines[] = tableText.split("\\n"); | |
for (int i = 3; i < lines.length; i++) { | |
String[] splitSpace = lines[i].split(" "); | |
String date = "" + splitSpace[1]; | |
String accessionno = "" + splitSpace[2]; | |
String title = lines[i] | |
.substring(lines[i].indexOf(splitSpace[2]) + splitSpace[2].length(), lines[i].indexOf(".....")); | |
String type = "" + splitSpace[splitSpace.length - 1]; | |
String author = "" + lines[i].substring(lines[i].indexOf(".....") + 5); | |
author = author.replace(type, ""); | |
BookIssue bookIssue = new BookIssue(uid, "", date.trim(), accessionno.trim(), title.trim(), author.trim(), | |
type.trim()); | |
allBooks.add(bookIssue); | |
} | |
} | |
public boolean shouldGoNextPage(int totalHits, int currentPage) { | |
if (totalHits - currentPage * 15 > 0) { | |
return true; | |
} | |
return false; | |
} | |
private class BookIssue { | |
String userid, username, date, accessionno, title, author, type; | |
public BookIssue(String uid, String username, String date, String accessionno, String title, String author, String type) { | |
this.userid = uid; | |
this.username = username; | |
this.date = date; | |
this.accessionno = accessionno; | |
this.title = title; | |
this.author = author; | |
this.type = type; | |
} | |
@Override | |
public String toString() { | |
return "{\"userid\":" + userid + ", \"username\":\"" + username + "\", \"date\":\"" + date + "\", \"accessionno\":\"" | |
+ accessionno + "\", \"title\":\"" + title + "\", \"author\":\"" + author + "\", \"type\":\"" + type + "\"}"; | |
} | |
} | |
private class NumIssuedBook { | |
String id; | |
int bookNumber; | |
public NumIssuedBook(String id, int bookNumber) { | |
this.id = id; | |
this.bookNumber = bookNumber; | |
} | |
@Override | |
public String toString() { | |
return "{\"id\":" + id + ", \"bookNumber\":" + bookNumber + "}"; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment