Skip to content

Instantly share code, notes, and snippets.

from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.providers.google.cloud.transfers.gcs_to_bigquery import (
GCSToBigQueryOperator,
)
from airflow.providers.google.cloud.operators.bigquery import (
BigQueryExecuteQueryOperator,
BigQueryInsertJobOperator,
BigQueryTableCheckOperator,
)
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.providers.google.cloud.operators.bigquery import (
BigQueryExecuteQueryOperator,
)
from datetime import datetime
from google.cloud import storage, bigquery
import requests
import zipfile
import os
from airflow.decorators import dag, task
from datetime import datetime, timedelta
import requests
import json
import os
from google.cloud import storage
from airflow.operators.bash import BashOperator
default_args = {
"email_on_failure": False,
import psycopg2
import os
def delete_old_records(conn, start_date, end_date):
delete_query = """DELETE FROM raw.public.stage_earthquake
WHERE dt BETWEEN %s AND %s
"""
cur = conn.cursor()
import psycopg2
import os
def delete_old_records(conn, csv_file_path):
# create the delete query
delete_query = "DELETE FROM earthquake WHERE filename=%s"
cur = conn.cursor()
cur.execute(delete_query, (csv_file_path,))
import requests
import pandas as pd
from datetime import datetime, timedelta
import os
def process_earthquake_data():
today_date = datetime.now()
timeDelta = timedelta(days=1)
import requests
from pdfminer.high_level import extract_text
from collections import Counter
import re
# Step 1: Download and save the PDF
pdf_url = "https://arxiv.org/pdf/1509.02971.pdf"
response = requests.get(pdf_url)
pdf_filename = "nasa_flight_plan.pdf"
version: '3'
services:
postgres:
image: postgres:13
environment:
- POSTGRES_USER=airflow
- POSTGRES_PASSWORD=airflow
- POSTGRES_DB=airflow
airflow-init:
# data_pipeline.py
from airflow import DAG
from airflow.providers.http.operators.http import HttpOperator
from airflow.operators.python import PythonOperator
import base64
from datetime import datetime, timedelta
import io
import zipfile
import pandas as pd
import pdfplumber
import PyPDF2
# Open the PDF file with PyPDF2
pdf_file = 'example.pdf'
pdf_reader = PyPDF2.PdfReader(pdf_file)
# Extract text with PyPDF2
full_text = ""