Skip to content

Instantly share code, notes, and snippets.

View Abhayparashar31's full-sized avatar
:octocat:
Learning New Things

Abhay Parashar Abhayparashar31

:octocat:
Learning New Things
View GitHub Profile
import gensim
import re
from gensim.summarization.summarizer import summarize
import requests
from bs4 import BeautifulSoup
url = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
res = requests.get(url)
soup = BeautifulSoup(res.text,'html.parser')
def get_key(val):
for key, value in sentences_score.items():
if val == value:
return key
key = get_key(max(sentences_score.values()))
summary = heapq.nlargest(n,sentences_score,key=sentences_score.get) ## n=3
print(" ".join(summary))
sentences_score = {}
for sentence in sent_tokens:
for word in word_tokenize(sentence):
if word in word_frequency.keys():
if (len(sentence.split(" "))) <30:
if sentence not in sentences_score.keys():
sentences_score[sentence] = word_frequency[word]
else:
sentences_score[sentence] += word_frequency[word]
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.tokenize import sent_tokenize,word_tokenize
sent_tokens = sent_tokenize(cleaned_data)
word_tokens = word_tokenize(cleaned_data)
word_frequency = {}
stopwords = set(stopwords.words("english"))
import re
def clean_data(data):
text = re.sub(r"\[[0-9]*\]"," ",data)
text = text.lower()
text = re.sub(r'\s+'," ",text)
text = re.sub(r","," ",text)
return text
cleaned_data = clean_data(raw_data)
from bs4 import BeautifulSoup
import requests
def scrape_con(url):
res = requests.get(url)
soup = BeautifulSoup(res.text,'html.parser')
content = soup.findAll("p")
data = ""
for text in content:
data +=text.text
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV
params = {'max_depth': [3, 6, 8,10,12],
'learning_rate': [0.01, 0.1, 0.2, 0.001, 0.3],
'colsample_bytree': np.arange(0.3, 1.0, 0.1),
'colsample_bylevel': np.arange(0.3, 1.0, 0.1),
'subsample': np.arange(0.3, 1.0, 0.1),
'n_estimators': [100, 150, 200, 250,300]
}
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
## Base Models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
base_learners = [
('l1', KNeighborsClassifier()),
('l2', DecisionTreeClassifier()),
('l3',SVC(gamma=2, C=1)))
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
dt = DecisionTreeClassifier(max_depth=2, random_state=0)
adc = AdaBoostClassifier(base_estimator=dt, n_estimators=7, learning_rate=0.1, random_state=0)
adc.fit(x_train, y_train)