This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| document.querySelector('#yDmH0d > c-wiz:nth-child(21) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div.LgQiCc.vOSR6b.RVNZdd.qtMyGd > content > div.khfFee > div > div.CC5fre') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| #example list of urls to inspect | |
| site_pages = ["https://www.ranksense.com/", "https://www.ranksense.com/how-it-works/", | |
| "https://www.ranksense.com/pricing/", "https://www.ranksense.com/blog/", | |
| "https://www.ranksense.com/products/organic-search-ads/feed/", | |
| "https://www.ranksense.com/additional-ways-to-use-chrome-developer-tools-for-seo/", | |
| "https://www.ranksense.com/empowering-a-new-generation-of-seos-with-python/"] | |
| data = asyncio.get_event_loop().run_until_complete(inspect_urls(site_pages)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sklearn as sk | |
| from bs4 import BeautifulSoup | |
| img_counts = pd.read_csv("/content/gdrive/My Drive/img_sizes.csv", usecols=["url", "img_src", "filesize", "width", "height"]) | |
| form_counts = pd.read_csv("/content/gdrive/My Drive/form_counts.csv", usecols=["url", "form_count", "input_count"])] | |
| form_counts.head().drop("url", axis=1) | |
| #outputs example data | |
| img_counts.drop(["url", "img_src"], axis=1).head() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def img_size_group(size): | |
| max_size = 50000 | |
| #image size bins | |
| img_size_groups = [i for i in | |
| zip( | |
| [i for i in range(0, max_size, 1000)], | |
| [i for i in range(1000, max_size, 1000)] | |
| ) | |
| ] | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ml_data = form_counts.merge(onehot_img, on="url") | |
| ml_data.loc[:, 'group'] = "N/A" | |
| ml_data.loc[ml_data['url'].str.contains(r".*/products/.*|.*/product/.*"), "group"] = "Products" | |
| ml_data.loc[ml_data['url'].str.contains(r"/collections(?!.*/products.*)(?!.*/product.*)"), "group"] = "Category" | |
| #splitting dataset into training and testing | |
| X_train, X_test, y_train, y_test = train_test_split(ml_data.drop(["group", "url"], axis=1), ml_data['group'], test_size=0.2, random_state=42) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| names = [ | |
| "Naive Bayes", | |
| "Linear SVM", | |
| "Logistic Regression", | |
| "Random Forest", | |
| "Multilayer Perceptron" | |
| ] | |
| classifiers = [ | |
| MultinomialNB(), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def plot_confusion_matrix(cm, classes, | |
| normalize=False, | |
| title='Confusion matrix', | |
| cmap=plt.cm.Blues): | |
| """ | |
| This function prints and plots the confusion matrix. | |
| Normalization can be applied by setting `normalize=True`. | |
| """ | |
| if normalize: | |
| cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| results = pd.DataFrame(rows, columns=["algorithm", "score", "params"]) | |
| results = results.groupby("algorithm").max().reset_index() | |
| plot_data = [] | |
| bar = go.Bar( | |
| y = results['score'].tolist(), | |
| x = results['algorithm'].tolist() | |
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| from urllib.parse import urlencode, urlparse, urlunparse, quote | |
| import pandas as pd | |
| def get_seo_branded_data(brand, domain, database="us", export_columns="Ph,Po,Nq,Ur,Tg,Td,Ts", display_limit=10000, display_filter="+|Ph|Co|{brand}"): | |
| global key | |
| url_params={"type": "domain_organic", | |
| "key": key, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| database="us" | |
| macys="macys.com" | |
| brand="Tommy Hilfiger" | |
| macys_df = get_seo_branded_data(brand, macys, export_columns="Ph,Po,Tg") # only keyword, position and traffic | |
| #we explicitly convert numbers to integers to be able to perform arithmetic operations later | |
| convert_dict = {'Keyword': str, 'Position': int, 'Traffic': int} | |
| macys_df = macys_df.astype(convert_dict) |