Skip to content

Instantly share code, notes, and snippets.

@crearo
Created April 13, 2016 00:30
Show Gist options
  • Save crearo/dafd240bf9d1dffcb0c21611e77fea3b to your computer and use it in GitHub Desktop.
Save crearo/dafd240bf9d1dffcb0c21611e77fea3b to your computer and use it in GitHub Desktop.
DA-IICT Resource Center Data Scraping
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.support.ui.Select;
public class ScrapeRC {
WebDriver driver;
ArrayList<BookIssue> allBooks;
ArrayList<String> loginUnsuccessful;
ArrayList<NumIssuedBook> numIssuedBooks;
public static void main(String[] args) {
ScrapeRC rc = new ScrapeRC();
rc.allBooks = new ArrayList<ScrapeRC.BookIssue>();
rc.loginUnsuccessful = new ArrayList<String>();
rc.numIssuedBooks = new ArrayList<NumIssuedBook>();
rc.setup();
for (int i = 1; i <= 10; i++) {
String b = String.format("%03d", i);
rc.login("201301" + b);
}
for (int i = 401; i <= 459; i++) {
rc.login("201301" + i);
}
rc.driver.close();
System.out.println("Fini");
try {
File file = new File("/home/rish/rc-file.txt");
if (!file.exists()) {
file.createNewFile();
}
FileWriter fw = new FileWriter(file.getAbsoluteFile(), true);
BufferedWriter bw = new BufferedWriter(fw);
for (BookIssue bookIssue : rc.allBooks)
bw.write(bookIssue.toString() + ",\n");
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
File file = new File("/home/rish/rc-numIssued.txt");
if (!file.exists()) {
file.createNewFile();
}
FileWriter fw = new FileWriter(file.getAbsoluteFile(), true);
BufferedWriter bw = new BufferedWriter(fw);
for (NumIssuedBook bookIssue : rc.numIssuedBooks)
bw.write(bookIssue.toString() + ",\n");
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
File file = new File("/home/rish/rc-loginFailed.txt");
if (!file.exists()) {
file.createNewFile();
}
FileWriter fw = new FileWriter(file.getAbsoluteFile(), true);
BufferedWriter bw = new BufferedWriter(fw);
for (String bookIssue : rc.loginUnsuccessful)
bw.write(bookIssue.toString() + ",\n");
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public void setup() {
driver = new FirefoxDriver();
}
public boolean login(String id) {
try {
driver.navigate().to("http://resourcecentre.daiict.ac.in:8081/webslim/Login.asp?Logout=Yes");
WebElement userName_editbox = driver.findElement(By.name("txtID"));
WebElement password_editbox = driver.findElement(By.name("txtPW"));
WebElement submit_button = driver.findElement(By.name("Action"));
userName_editbox.sendKeys(id);
password_editbox.sendKeys(id);
submit_button.click();
if (driver.getCurrentUrl().equals("http://resourcecentre.daiict.ac.in:8081/webslim/Default.asp")) {
System.out.println("Logged in " + id);
getUserName();
findBooks(id);
return true;
} else {
System.out.println("Unable to login " + id);
loginUnsuccessful.add(id);
return false;
}
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
public void getUserName() {
// System.out.println(driver.findElement(By.xpath("//*[@id='tblTab']/tbody/tr[2]/td[1]/font")).getText());
}
public void findBooks(String uid) {
driver.navigate().to("http://resourcecentre.daiict.ac.in:8081/webslim/VuMyBooks.asp");
driver.findElement(By.name("txtnDays")).sendKeys("7250");
new Select(driver.findElement(By.name("MyList"))).selectByVisibleText("Detail Transaction");
driver.findElement(By.name("DataAction")).click();
if (driver.getPageSource().contains("Record(s) Hits")) {
int indexHits = driver.getPageSource().indexOf("Record(s) Hits");
String subHits = driver.getPageSource().substring(indexHits);
String stringHits = subHits.substring("Record(s) Hits".length(), subHits.indexOf("</b>"));
stringHits = stringHits.replace("<b>", "").trim();
int totalHits = Integer.parseInt(stringHits);
System.out.println("Total hits = " + totalHits);
numIssuedBooks.add(new NumIssuedBook(uid, totalHits));
// parseAndSaveBooks(uid);
// int currentPage = 1;
// while (shouldGoNextPage(totalHits, currentPage)) {
// parseAndSaveBooks(uid);
// currentPage++;
// driver.findElement(By.xpath("/html/body/font/table/tbody/tr/td[2]/table[2]/tbody/tr/td[1]/form/input[3]"))
// .click();
// }
}
}
public void parseAndSaveBooks(String uid) {
ArrayList<WebElement> elements = (ArrayList<WebElement>) driver.findElements(By
.xpath("/html/body/font/table/tbody/tr/td[2]/table[1]/tbody"));
String tableText = elements.get(0).getText();
String lines[] = tableText.split("\\n");
for (int i = 3; i < lines.length; i++) {
String[] splitSpace = lines[i].split(" ");
String date = "" + splitSpace[1];
String accessionno = "" + splitSpace[2];
String title = lines[i]
.substring(lines[i].indexOf(splitSpace[2]) + splitSpace[2].length(), lines[i].indexOf("....."));
String type = "" + splitSpace[splitSpace.length - 1];
String author = "" + lines[i].substring(lines[i].indexOf(".....") + 5);
author = author.replace(type, "");
BookIssue bookIssue = new BookIssue(uid, "", date.trim(), accessionno.trim(), title.trim(), author.trim(),
type.trim());
allBooks.add(bookIssue);
}
}
public boolean shouldGoNextPage(int totalHits, int currentPage) {
if (totalHits - currentPage * 15 > 0) {
return true;
}
return false;
}
private class BookIssue {
String userid, username, date, accessionno, title, author, type;
public BookIssue(String uid, String username, String date, String accessionno, String title, String author, String type) {
this.userid = uid;
this.username = username;
this.date = date;
this.accessionno = accessionno;
this.title = title;
this.author = author;
this.type = type;
}
@Override
public String toString() {
return "{\"userid\":" + userid + ", \"username\":\"" + username + "\", \"date\":\"" + date + "\", \"accessionno\":\""
+ accessionno + "\", \"title\":\"" + title + "\", \"author\":\"" + author + "\", \"type\":\"" + type + "\"}";
}
}
private class NumIssuedBook {
String id;
int bookNumber;
public NumIssuedBook(String id, int bookNumber) {
this.id = id;
this.bookNumber = bookNumber;
}
@Override
public String toString() {
return "{\"id\":" + id + ", \"bookNumber\":" + bookNumber + "}";
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment