This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import gensim | |
| import re | |
| from gensim.summarization.summarizer import summarize | |
| import requests | |
| from bs4 import BeautifulSoup | |
| url = 'https://en.wikipedia.org/wiki/Python_(programming_language)' | |
| res = requests.get(url) | |
| soup = BeautifulSoup(res.text,'html.parser') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_key(val): | |
| for key, value in sentences_score.items(): | |
| if val == value: | |
| return key | |
| key = get_key(max(sentences_score.values())) | |
| summary = heapq.nlargest(n,sentences_score,key=sentences_score.get) ## n=3 | |
| print(" ".join(summary)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| sentences_score = {} | |
| for sentence in sent_tokens: | |
| for word in word_tokenize(sentence): | |
| if word in word_frequency.keys(): | |
| if (len(sentence.split(" "))) <30: | |
| if sentence not in sentences_score.keys(): | |
| sentences_score[sentence] = word_frequency[word] | |
| else: | |
| sentences_score[sentence] += word_frequency[word] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import nltk | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| from nltk.tokenize import sent_tokenize,word_tokenize | |
| sent_tokens = sent_tokenize(cleaned_data) | |
| word_tokens = word_tokenize(cleaned_data) | |
| word_frequency = {} | |
| stopwords = set(stopwords.words("english")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| def clean_data(data): | |
| text = re.sub(r"\[[0-9]*\]"," ",data) | |
| text = text.lower() | |
| text = re.sub(r'\s+'," ",text) | |
| text = re.sub(r","," ",text) | |
| return text | |
| cleaned_data = clean_data(raw_data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from bs4 import BeautifulSoup | |
| import requests | |
| def scrape_con(url): | |
| res = requests.get(url) | |
| soup = BeautifulSoup(res.text,'html.parser') | |
| content = soup.findAll("p") | |
| data = "" | |
| for text in content: | |
| data +=text.text |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import xgboost as xgb | |
| from sklearn.model_selection import RandomizedSearchCV | |
| params = {'max_depth': [3, 6, 8,10,12], | |
| 'learning_rate': [0.01, 0.1, 0.2, 0.001, 0.3], | |
| 'colsample_bytree': np.arange(0.3, 1.0, 0.1), | |
| 'colsample_bylevel': np.arange(0.3, 1.0, 0.1), | |
| 'subsample': np.arange(0.3, 1.0, 0.1), | |
| 'n_estimators': [100, 150, 200, 250,300] | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| from sklearn.datasets import make_classification | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import accuracy_score | |
| ## Base Models | |
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.svm import SVC |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.svm import SVC | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.ensemble import StackingClassifier | |
| base_learners = [ | |
| ('l1', KNeighborsClassifier()), | |
| ('l2', DecisionTreeClassifier()), | |
| ('l3',SVC(gamma=2, C=1))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.ensemble import AdaBoostClassifier | |
| dt = DecisionTreeClassifier(max_depth=2, random_state=0) | |
| adc = AdaBoostClassifier(base_estimator=dt, n_estimators=7, learning_rate=0.1, random_state=0) | |
| adc.fit(x_train, y_train) |