snoop2head · January 2, 2020 16:16
diff --git a/gistfile1.txt b/gistfile1.txt
 import numpy as np
 import pandas as pd
 import requests
 from bs4 import BeautifulSoup
 from selenium import webdriver
 import csv
 from urllib.parse import urlparse

 def crwl_as_csv(univ_query):
    page = 1
    dummy_data1 = {}
    df = pd.DataFrame(dummy_data1)
    while page:
        url = "https://oia.yonsei.ac.kr/partner/expReport.asp?page=" + str(page)+"&cur_pack=0&ucode="+str(univ_query)+"&bgbn=A"
        res = requests.get(url)
        soup = BeautifulSoup(res.content,'lxml')
        table = soup.find_all('table')[0]
        df_crawl = pd.read_html(str(table),encoding='utf-8', header=0)[0]
        df_crawl['href'] = [np.where(tag.has_attr('href'),tag.get('href'),"no link") for tag in table.find_all('a')]
        if not df_crawl.empty:
            page += 1
            df = pd.concat([df, df_crawl],sort=False)
        else:
            print(df)
            break
    df_without_index = df.reset_index()
    print(df_without_index)
    df_without_index.to_csv(r'C:/Users/pc/Documents/GitHub/OIA_Text_Wrangling/dataf/'+univ_query+'.csv',index=False,encoding="utf-8")
	import numpy as np
	import pandas as pd
	import requests
	from bs4 import BeautifulSoup
	from selenium import webdriver
	import csv
	from urllib.parse import urlparse

	def crwl_as_csv(univ_query):
	page = 1
	dummy_data1 = {}
	df = pd.DataFrame(dummy_data1)
	while page:
	url = "https://oia.yonsei.ac.kr/partner/expReport.asp?page=" + str(page)+"&cur_pack=0&ucode="+str(univ_query)+"&bgbn=A"
	res = requests.get(url)
	soup = BeautifulSoup(res.content,'lxml')
	table = soup.find_all('table')[0]
	df_crawl = pd.read_html(str(table),encoding='utf-8', header=0)[0]
	df_crawl['href'] = [np.where(tag.has_attr('href'),tag.get('href'),"no link") for tag in table.find_all('a')]
	if not df_crawl.empty:
	page += 1
	df = pd.concat([df, df_crawl],sort=False)
	else:
	print(df)
	break
	df_without_index = df.reset_index()
	print(df_without_index)
	df_without_index.to_csv(r'C:/Users/pc/Documents/GitHub/OIA_Text_Wrangling/dataf/'+univ_query+'.csv',index=False,encoding="utf-8")