rkreddyp · February 15, 2020 19:07
diff --git a/parse_debian_tracker_give_json.py b/parse_debian_tracker_give_json.py
 def parse_debian_tracker_give_json(cve):
    url = 'https://security-tracker.debian.org/tracker/' + cve
    page = requests.get(url)
    soup = BeautifulSoup(page.text, 'html.parser')
    df_arr = []
    for table in soup.find_all('table'):
        df = scrape_helpers.parse_html_table(table)
        df = df.iloc[0:1].reset_index()
        df=df.drop(['index'], axis='columns')
        if 'Debian Bugs' in df.columns:
            df=df.drop(['Debian Bugs'], axis='columns')
        #column_list = ['Name', 'Description', 'Source', 'NVD severity', 'References', 'Source', 'Package', 'Release', 'Version', 'Status', 'References', 'Type', 'Fixed Version', 'Urgency', 'Origin', 'NVD severity']
        column_list = ['Package', 'Type' , 'Fixed Version', 'Urgency', 'Origin', 'NVD severity', 'References']
        for col in column_list:
            if col not in df.columns:
                df[col] = np.nan

        if 'References' not in df.columns:
            df['References'] = np.nan
        if 'NVD severity' not in df.columns:
            df['NVD severity'] = np.nan

        #df_arr.append(df.dropna(axis=1))
        df_arr.append(df)

    df  = pd.concat(df_arr, axis=1)
    cols = ['Package', 'Type' , 'Fixed Version', 'Urgency', 'Origin']
    f = df [cols]
    df = df.loc[:,~df.columns.duplicated()]

    cve_json = json.loads(df.to_json(orient='records'))[0]
    notes = soup.find_all('span')[-1].text
    cve_json['notes'] = notes
    return cve_json
	def parse_debian_tracker_give_json(cve):
	url = 'https://security-tracker.debian.org/tracker/' + cve
	page = requests.get(url)
	soup = BeautifulSoup(page.text, 'html.parser')
	df_arr = []
	for table in soup.find_all('table'):
	df = scrape_helpers.parse_html_table(table)
	df = df.iloc[0:1].reset_index()
	df=df.drop(['index'], axis='columns')
	if 'Debian Bugs' in df.columns:
	df=df.drop(['Debian Bugs'], axis='columns')
	#column_list = ['Name', 'Description', 'Source', 'NVD severity', 'References', 'Source', 'Package', 'Release', 'Version', 'Status', 'References', 'Type', 'Fixed Version', 'Urgency', 'Origin', 'NVD severity']
	column_list = ['Package', 'Type' , 'Fixed Version', 'Urgency', 'Origin', 'NVD severity', 'References']
	for col in column_list:
	if col not in df.columns:
	df[col] = np.nan

	if 'References' not in df.columns:
	df['References'] = np.nan
	if 'NVD severity' not in df.columns:
	df['NVD severity'] = np.nan

	#df_arr.append(df.dropna(axis=1))
	df_arr.append(df)

	df = pd.concat(df_arr, axis=1)
	cols = ['Package', 'Type' , 'Fixed Version', 'Urgency', 'Origin']
	f = df [cols]
	df = df.loc[:,~df.columns.duplicated()]

	cve_json = json.loads(df.to_json(orient='records'))[0]
	notes = soup.find_all('span')[-1].text
	cve_json['notes'] = notes
	return cve_json