A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
The t command-line Twitter tool is a great way to work with Twitter information in a spreadsheet.
Its homepage with good installation instructions is here:
https://github.com/sferik/t
And I've written some related instructions about how to get an authentication token from Twitter:
import thisYour humble guides (ping us):
| /* | |
| This script is meant to be used with a Google Sheets spreadsheet. When you edit a cell containing a | |
| valid CSS hexadecimal colour code (like #000 or #000000), the background colour will be changed to | |
| that colour and the font colour will be changed to the inverse colour for readability. | |
| To use this script in a Google Sheets spreadsheet: | |
| 1. go to Tools » Script Editor » Spreadsheet; | |
| 2. erase everything in the text editor; | |
| 3. change the title to "Set colour preview on edit"; |
| # Single selections using iloc and DataFrame | |
| # Rows: | |
| data.iloc[0] # first row of data frame (Aleshia Tomkiewicz) - Note a Series data type output. | |
| data.iloc[1] # second row of data frame (Evan Zigomalas) | |
| data.iloc[-1] # last row of data frame (Mi Richan) | |
| # Columns: | |
| data.iloc[:,0] # first column of data frame (first_name) | |
| data.iloc[:,1] # second column of data frame (last_name) | |
| data.iloc[:,-1] # last column of data frame (id) |
| # Multiple row and column selections using iloc and DataFrame | |
| data.iloc[0:5] # first five rows of dataframe | |
| data.iloc[:, 0:2] # first two columns of data frame with all rows | |
| data.iloc[[0,3,6,24], [0,5,6]] # 1st, 4th, 7th, 25th row + 1st 6th 7th columns. | |
| data.iloc[0:5, 5:8] # first 5 rows and 5th, 6th, 7th columns of data frame (county -> phone1). |
| # Select rows with index values 'Andrade' and 'Veness', with all columns between 'city' and 'email' | |
| data.loc[['Andrade', 'Veness'], 'city':'email'] | |
| # Select same rows, with just 'first_name', 'address' and 'city' columns | |
| data.loc['Andrade':'Veness', ['first_name', 'address', 'city']] | |
| # Change the index to be based on the 'id' column | |
| data.set_index('id', inplace=True) | |
| # select the row with 'id' = 487 | |
| data.loc[487] |
| import tweepy | |
| import csv | |
| import pandas as pd | |
| ####input your credentials here | |
| consumer_key = '' | |
| consumer_secret = '' | |
| access_token = '' | |
| access_token_secret = '' | |
| auth = tweepy.OAuthHandler(consumer_key, consumer_secret) |
| import requests | |
| import csv | |
| from documentcloud import DocumentCloud # https://documentcloud.readthedocs.io/en/latest/gettingstarted.html#installation | |
| # Install DocumentCloud Python Wrapper first: https://documentcloud.readthedocs.io/en/latest/index.html | |
| USERNAME = input('Username: ') | |
| PASSWORD = input('Password: ') | |
| client = DocumentCloud(USERNAME, PASSWORD) |