Created
September 29, 2017 18:53
-
-
Save anirudh708/d2f9a2512f3f3291e9cdc689dda92105 to your computer and use it in GitHub Desktop.
Java code for listing all telugu movies from 1940
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.util.Date; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Element; | |
import org.jsoup.select.Elements; | |
class Movie{ | |
public String movieName; | |
public String wikiLink; | |
public Date releaseDate; | |
public String genre; | |
} | |
public class MovieListing implements Runnable { | |
public static final String URL = "https://en.wikipedia.org/"; | |
public static final String LANG = "wiki/List_of_Telugu_films_of_"; | |
public void run(){ | |
try { | |
for(int year=1940; year <= 2016; year++){ | |
System.out.println(year+" --"); | |
Document d = Jsoup.connect(URL+LANG+year).get(); | |
Elements all_tables = d.getElementsByClass("wikitable"); | |
for(Element tableEle:all_tables){ | |
Elements caption = tableEle.getElementsByTag("caption"); | |
if(caption.size() > 0){ | |
continue; | |
} | |
Elements allMovies = tableEle.getElementsByTag("i"); | |
for(Element movieEle: allMovies){ | |
Movie movObj = new Movie(); | |
if(movieEle.children().size() == 0){ | |
//System.out.println(movieEle.text()); | |
movObj.movieName = movieEle.text(); | |
} | |
else{ | |
Element movielink = movieEle.child(0); | |
movObj.movieName = movielink.text(); | |
movObj.wikiLink = URL+movielink.attr("href"); | |
} | |
} | |
} | |
} | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
public static void main (String[] args){ | |
(new Thread(new MovieListing())).start(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment