This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gensim | |
import re | |
from gensim.summarization.summarizer import summarize | |
import requests | |
from bs4 import BeautifulSoup | |
url = 'https://en.wikipedia.org/wiki/Python_(programming_language)' | |
res = requests.get(url) | |
soup = BeautifulSoup(res.text,'html.parser') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_key(val): | |
for key, value in sentences_score.items(): | |
if val == value: | |
return key | |
key = get_key(max(sentences_score.values())) | |
summary = heapq.nlargest(n,sentences_score,key=sentences_score.get) ## n=3 | |
print(" ".join(summary)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sentences_score = {} | |
for sentence in sent_tokens: | |
for word in word_tokenize(sentence): | |
if word in word_frequency.keys(): | |
if (len(sentence.split(" "))) <30: | |
if sentence not in sentences_score.keys(): | |
sentences_score[sentence] = word_frequency[word] | |
else: | |
sentences_score[sentence] += word_frequency[word] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
from nltk.tokenize import sent_tokenize,word_tokenize | |
sent_tokens = sent_tokenize(cleaned_data) | |
word_tokens = word_tokenize(cleaned_data) | |
word_frequency = {} | |
stopwords = set(stopwords.words("english")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def clean_data(data): | |
text = re.sub(r"\[[0-9]*\]"," ",data) | |
text = text.lower() | |
text = re.sub(r'\s+'," ",text) | |
text = re.sub(r","," ",text) | |
return text | |
cleaned_data = clean_data(raw_data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
def scrape_con(url): | |
res = requests.get(url) | |
soup = BeautifulSoup(res.text,'html.parser') | |
content = soup.findAll("p") | |
data = "" | |
for text in content: | |
data +=text.text |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xgboost as xgb | |
from sklearn.model_selection import RandomizedSearchCV | |
params = {'max_depth': [3, 6, 8,10,12], | |
'learning_rate': [0.01, 0.1, 0.2, 0.001, 0.3], | |
'colsample_bytree': np.arange(0.3, 1.0, 0.1), | |
'colsample_bylevel': np.arange(0.3, 1.0, 0.1), | |
'subsample': np.arange(0.3, 1.0, 0.1), | |
'n_estimators': [100, 150, 200, 250,300] | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.datasets import make_classification | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import accuracy_score | |
## Base Models | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.svm import SVC |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.svm import SVC | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.ensemble import StackingClassifier | |
base_learners = [ | |
('l1', KNeighborsClassifier()), | |
('l2', DecisionTreeClassifier()), | |
('l3',SVC(gamma=2, C=1))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.ensemble import AdaBoostClassifier | |
dt = DecisionTreeClassifier(max_depth=2, random_state=0) | |
adc = AdaBoostClassifier(base_estimator=dt, n_estimators=7, learning_rate=0.1, random_state=0) | |
adc.fit(x_train, y_train) |