Skip to content

Instantly share code, notes, and snippets.

@AL1L
Last active May 24, 2017 01:09
Show Gist options
  • Save AL1L/896eabc697a02df4a9f6ca7959228193 to your computer and use it in GitHub Desktop.
Save AL1L/896eabc697a02df4a9f6ca7959228193 to your computer and use it in GitHub Desktop.
package com.al1l.d3scrape;
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;
import com.mashape.unirest.http.exceptions.UnirestException;
import com.mashape.unirest.request.GetRequest;
import me.diamonddev.craftoblo.items.ItemQuality;
import org.json.JSONArray;
import org.json.JSONObject;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Main {
public static final boolean OUTPUT_JSON = false;
public static final int FORMAT_JSON = 0;
public static final boolean OUTPUT_HUMAN = false;
public static void main(String[] args) {
long start = System.currentTimeMillis();
String[] urls = {
"https://us.battle.net/d3/en/item/helm/",
"https://us.battle.net/d3/en/item/spirit-stone/",
"https://us.battle.net/d3/en/item/voodoo-mask/",
"https://us.battle.net/d3/en/item/wizard-hat/",
"https://us.battle.net/d3/en/item/pauldrons/",
"https://us.battle.net/d3/en/item/chest-armor/",
"https://us.battle.net/d3/en/item/cloak/",
"https://us.battle.net/d3/en/item/bracers/",
"https://us.battle.net/d3/en/item/gloves/",
"https://us.battle.net/d3/en/item/belt/",
"https://us.battle.net/d3/en/item/mighty-belt/",
"https://us.battle.net/d3/en/item/pants/",
"https://us.battle.net/d3/en/item/boots/",
"https://us.battle.net/d3/en/item/amulet/",
"https://us.battle.net/d3/en/item/ring/",
"https://us.battle.net/d3/en/item/shield/",
"https://us.battle.net/d3/en/item/crusader-shield/",
"https://us.battle.net/d3/en/item/mojo/",
"https://us.battle.net/d3/en/item/orb/",
"https://us.battle.net/d3/en/item/quiver/"
};
String[] sections = {
"Helms",
"Spirit Stones",
"Voodoo Masks",
"Wizard Hats",
"Pauldrons",
"Chest Armor",
"Cloaks",
"Bracers",
"Gloves",
"Belts",
"Mighty Belts",
"Pants",
"Boots",
"Amulets",
"Rings",
"Shields",
"Crusader Shields",
"Mojos",
"Orbs",
"Quivers",
"Enchantress Focuses",
"Scoundrel Tokens",
"Templar Relics",
"Axes",
"Daggers",
"Maces",
"Spears",
"Swords",
"Ceremonial Knives",
"Fist Weapons",
"Flails Cru",
"Mighty Weapons",
"Axes",
"Maces",
"Polearms",
"Staves",
"Swords",
"Daibo",
"Flails",
"Mighty Weapons",
"Bows",
"Crossbows",
"Hand Crossbows",
"Wands",
"Potions",
"Crafting Materials",
"Blacksmith Plans",
"Jeweler Designs",
"Pages of Training",
"Dyes",
"Gems",
"Miscellaneous"
};
System.out.println("Starting scrape...");
System.out.println("Progress: 0% (time: " + (System.currentTimeMillis() - start) + "ms)");
List<JSONObject> jsonObjects = new ArrayList<JSONObject>();
for (int i = 0; i < urls.length; i++) {
jsonObjects.add(scrape(urls[i], sections[i]));
System.out.println("Progress: " + ((int) (((((double) i) + 1) / urls.length) * 100)) + "% (time: " + (System.currentTimeMillis() - start) + "ms)");
}
Map<String, Object> out = new HashMap<String, Object>();
out.put("items", new JSONArray(jsonObjects));
System.out.println(new JSONObject(out).toString(2));
System.out.println("\nTook " + (System.currentTimeMillis() - start) + "ms");
}
public static JSONObject scrape(String url, String section) {
GetRequest request = Unirest.get(url);
List<JSONObject> jsonObjectList = new ArrayList<JSONObject>();
try {
HttpResponse<String> s = request.asString();
String tableRows = s.getBody().split("<tbody>")[1].split("</tbody>")[0];
String[] rows = tableRows.split("tr>");
for (int i = 0; i < rows.length; i++)
rows[i] = rows[i].trim() + "tr>";
for (String row : rows) {
if (row == null)
continue;
Map<String, Object> itemMap = new HashMap<String, Object>();
if (row.split("<div class=\"item-details-text\">").length != 2)
continue;
String itemDetails = row.split("<div class=\"item-details-text\">")[1];
String name = itemDetails.split("<a href=\"/d3/en/")[1].split(">")[1].split("<")[0];
name = name.replaceAll("&#39;", "'");
if (OUTPUT_HUMAN) System.out.println("\n--- " + name + " ---");
String color = itemDetails.split("<a href=\"/d3/en/")[1].split("class=\"")[1].split("\">")[0].trim();
ItemQuality quality;
if (color.equals("d3-color-default")) {
quality = ItemQuality.COMMON;
} else if (color.equals("d3-color-orange")) {
quality = ItemQuality.LEGENDARY;
} else if (color.equals("d3-color-yellow")) {
quality = ItemQuality.RARE;
} else if (color.equals("d3-color-blue")) {
quality = ItemQuality.MAGIC;
} else if (color.equals("d3-color-green")) {
quality = ItemQuality.SET;
} else {
quality = ItemQuality.COMMON;
}
if (OUTPUT_HUMAN) System.out.println("Is " + quality);
String lvlStr = row.split("data-raw=\"")[1].split("\"")[0];
if (OUTPUT_HUMAN) System.out.println("Req lvl " + lvlStr);
if (itemDetails.contains("Armor</li>")) {
if (OUTPUT_HUMAN) System.out.println("Is Armor");
Map<String, Object> armor = new HashMap<String, Object>();
String armorRange = itemDetails.split("<ul class=\"item-armor-weapon item-armor-armor\">")[1].split("class=\"value\">")[1].split("<")[0];
if (OUTPUT_HUMAN) System.out.println("Armor Range: ");
String min;
String max;
if (armorRange.contains("–")) {
min = armorRange.split("–")[0];
max = armorRange.split("–")[1];
} else {
min = armorRange;
max = armorRange;
}
if (OUTPUT_HUMAN) System.out.println(" Min: " + min);
if (OUTPUT_HUMAN) System.out.println(" Max: " + max);
armor.put("min", Integer.parseInt(min));
armor.put("max", Integer.parseInt(max));
itemMap.put("armor", armor);
}
itemMap.put("name", name);
itemMap.put("quality", quality.name());
itemMap.put("level", Integer.parseInt(lvlStr));
if (OUTPUT_HUMAN && OUTPUT_JSON) System.out.println("Json: ");
if (OUTPUT_HUMAN && OUTPUT_JSON && FORMAT_JSON > 0)
System.out.println(new JSONObject(itemMap).toString(FORMAT_JSON));
if (OUTPUT_HUMAN && OUTPUT_JSON && FORMAT_JSON == 0)
System.out.println(new JSONObject(itemMap).toString());
jsonObjectList.add(new JSONObject(itemMap));
}
} catch (UnirestException e) {
e.printStackTrace();
}
Map<String, Object> out = new HashMap<String, Object>();
out.put(section, new JSONArray(jsonObjectList));
if (!OUTPUT_HUMAN && OUTPUT_JSON) {
if (FORMAT_JSON == 0)
System.out.println(new JSONObject(out));
if (FORMAT_JSON > 0)
System.out.println(new JSONObject(out).toString(FORMAT_JSON));
}
return new JSONObject(out);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment