Skip to content

Instantly share code, notes, and snippets.

View PandaWhoCodes's full-sized avatar

Thomas Ashish Cherian PandaWhoCodes

View GitHub Profile
@PandaWhoCodes
PandaWhoCodes / get_website_text.py
Created January 27, 2020 18:27
get text from a web page
from bs4 import BeautifulSoup
from bs4.element import Comment
import requests
def tag_visible(element):
if element.parent.name in ['script', 'style', 'head', 'title', 'meta', '[document]']:
return False
if isinstance(element, Comment):
return False
@PandaWhoCodes
PandaWhoCodes / word_to_vec_genism.py
Created January 27, 2020 18:25
Word to vec using genism
from gensim.models import Word2Vec
# from tests import get_all_text
def save_model():
# define training data
sentences = get_all_text()
# print(sentences)
# train model
model = Word2Vec(sentences, min_count=1)
@PandaWhoCodes
PandaWhoCodes / extract_companies.py
Created January 27, 2020 18:23
Extract companies from given twitter profiles
import sys
from utils import save_to_db, get_topics, get_companies, get_products
from tweet_collector.request import collector
import csv
import requests
def get_profiles_from_file(source):
"""
takes the source file location and extracts all the twitter profiles
@PandaWhoCodes
PandaWhoCodes / csv_to_tag_cloud.py
Created January 27, 2020 18:21
CSV to tag cloud HTML
import csv
def read_csv(filename="freq.csv"):
"""
reads a csv file and returns the object
:param filename: csv file to be read
:return: csv object
"""
csv_content = []
@PandaWhoCodes
PandaWhoCodes / image_ocr_preprocess.ipynb
Created January 27, 2020 18:20
code to imitate what camscanner does before conducting OCR on an image
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@PandaWhoCodes
PandaWhoCodes / fruit_classification.ipynb
Created January 27, 2020 18:17
1. A sample program for picking a set of objects and restoring it. - load_sav_obj.py 2. A simple supervised ML app (not IRIS but uses some other data) along with data fruit_classification using various scikit-learn functions. 4. Building a simple classifier using NN from scratch. - Handwritten digit recognition building a simple NN using TFlearn…
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@PandaWhoCodes
PandaWhoCodes / filter_posts.py
Last active July 3, 2021 04:51
parse and filter out wordpress posts. python filter_posts.py [XML File name] [txt or html] [keyword1 keyword2 ....]
from parse import find_posts, set_filename
import sys
import os
import string
from bs4 import BeautifulSoup
def format_filename(s):
"""
Take a string and return a valid filename constructed from the string.
We can't make this file beautiful and searchable because it's too large.
_unit_id,_created_at,_golden,_id,_missed,_started_at,_tainted,_channel,_trust,_worker_id,_country,_region,_city,_ip,the_author_of_the_tweet_seems_to_be_an_eye_witness_of_the_event,type_of_message,nil,the_author_of_the_tweet_seems_to_be_an_eye_witness_of_the_event_gold,tweet,tweet_no,tweet_no_rt,type_of_message_gold,user
238841781,12/24/2012 13:41:45,false,787060207,,12/24/2012 13:37:36,false,instagc,0.7333,14425455,USA,MI,Rochester,69.136.129.135,,Informative: other,,,important --> @JebBush suggests federal gov't not crucial to storm recovery http://t.co/pVsV6qoS #haction #2012 #sandy,11899,important --> @JebBush suggests federal gov't not crucial to storm recovery http://t.co/pVsV6qoS #haction #2012 #sandy,,danholler
238841781,12/24/2012 13:44:17,false,787063467,,12/24/2012 13:39:45,false,instagc,0.8333,13441146,USA,GA,Cleveland,98.18.108.46,,"Informative: offers/gives donations of money, goods, or free services",,,important --> @JebBush suggests federal gov't not crucial to storm recovery http://t.
@PandaWhoCodes
PandaWhoCodes / pygz_intro_01.py
Last active November 25, 2019 08:02
Series of examples to get you started with PyGame Zero
"""
Code to open a pgz window
"""
import pgzrun
WIDTH = 800
HEIGHT = 500
BLACK = (0, 0, 0)
def draw(): # Pygame Zero draw function
import pgzrun
from random import randint, choice
import string
WIDTH = 800
HEIGHT = 500
VELOCITY = 1
BLACK = (0, 0, 0)
WHITE = (255, 255, 255)
LETTER = {"letter": "", "x": 0, "y": 0}