This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for records grouped by colA, colB, and colC return a df where colD is unique: | |
import pyspark.sql.functions as fn | |
df.groupBy('colA', 'colB', 'colC').agg(fn.collect_list('colD').alias('newColD'), fn.count('colD').alias('count').filter(fn.col('count') > 1)) | |
df.select(fn.explode('newColD').alias('colDUniques')).show() | |
# given a subset of columns, return a dataframe where duplciates exists for these columns: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.http import Request | |
def parse_final_page(self, response): | |
# do scraping here: | |
def get_next_page(self, response, url_append): | |
new_url = response.url + url_append | |
req = Request( | |
url=new_url, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.spiders import CrawlSpider | |
from scrapy.loader.processors import Identity, TakeFirst | |
import logging | |
logger = logging.getLogger(__name__) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from glob import glob | |
from pathlib import Path | |
import time | |
start_path = Path(os.path.expanduser("~")) | |
"""Glob.""" | |
def list_comp_and_glob(): | |
return [i for i in glob(str(start_path) + '/**/*.mkv', recursive=True)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# -*- coding: utf-8 -*- | |
"""vlc media player; based off example in vlc repo: | |
`http://git.videolan.org/?p=vlc/bindings/python.git;a=commit;h=HEAD` | |
See also: | |
`http://infohost.nmt.edu/tcc/help/pubs/tkinter/web/menu.html` | |
`http://infohost.nmt.edu/tcc/help/pubs/tkinter/web/menu-coptions.html` |