This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PDF Scanner, Shortener and Marker | |
import PyPDF2 | |
from PyPDF2 import PdfFileWriter,PdfFileReader | |
import os | |
# use path directly or deal with windows \ or /: | |
# pa=r"C:\Users\Dome\Desktop\nu\Wahlprogramme 2017\afd.pdf".replace("\\", "/") | |
# Part 1: PDF Scanner and Shortener |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Pdf2wordcloud | |
# 1) pdf to text object | |
import os | |
import PyPDF2 | |
from PyPDF2 import PdfFileWriter | |
party="grüne" # and others: Linke, Grüne, SPD, FDP, CDU/CSU, AfD | |
pa= "C:/Users/Dome/Desktop/nu/Wahlprogramme 2017/" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1. # Tweets to sentiment of tokenized row items (average) | |
2. # ATTENTION: BUGS WITH \x OR OTHER SIMILAR CHARACTERS (\n IS REPLACED ALREADY) | |
3. | |
4. import pandas as pd | |
5. import re | |
6. import os | |
7. os.chdir("C:/Users/Dome/Desktop/nu/Tweets/") | |
8. | |
9. party= "fdp" | |
10. df=pd.read_json(party+".json") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tweet to Twitter Birdcloud (Wordcloud) | |
import pandas as pd | |
import re | |
import os | |
os.chdir("C:/Users/Dome/Desktop/nu/Tweets/") | |
party= "linksfraktion" | |
df=pd.read_json(party+".json") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Scatterplots with seaborn | |
import pandas as pd | |
import os | |
pd.set_option('precision', 10) # working with csv data with high precision, | |
# meaning more characters after the comme | |
os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/") | |
# avoid confusion by converting number seperators (comma to point) | |
ma = pd.read_csv("MASTERENG.csv",float_precision='round_trip') # high precision |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# heatmap with seaborn | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
sns.set(style="white") | |
import os | |
pd.set_option('precision', 10) | |
os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# multi choropleth | |
import pandas as pd | |
import os | |
pd.set_option('precision', 10) | |
os.chdir("C:/Users/Dome/Desktop/nu/Wahl- und Strukturdaten/Tabellen/") | |
ma = pd.read_csv("MASTERENG.csv",float_precision='round_trip') | |
import folium | |
import webbrowser | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<script> | |
function show(shown, hidden) { | |
document.getElementById(shown).style.display='block'; | |
document.getElementById(hidden).style.display='none'; | |
return false; | |
} | |
</script> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# importing the neccessary packages | |
import speech_recognition as sr | |
from selenium import webdriver | |
# initialise speech_recognition | |
r = sr.Recognizer() | |
mic = sr.Microphone() | |
# listen! | |
with mic as source: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# adapted code for Python 3 and latest Scikit-learn version 0:23 | |
# based on https://medium.com/mlreview/topic-modeling-with-scikit-learn-e80d33668730 | |
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer | |
from sklearn.decomposition import NMF, LatentDirichletAllocation | |
import numpy as np | |
def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents): | |
for topic_idx, topic in enumerate(H): | |
print("Topic {}".format(topic_idx)) |
OlderNewer