This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
import pandas as pd | |
from airflow.models import DAG | |
from airflow.operators.python import PythonOperator | |
from datetime import datetime | |
default_args = { | |
'start_date': datetime(year=2021, month=6, day=20) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def load_data(path: str, ti) -> None: | |
data = ti.xcom_pull(key='transformed_data', task_ids=['transform_data']) | |
data_df = pd.DataFrame(data[0]) | |
data_df.to_csv(path, index=None) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def transform_data(ti) -> None: | |
data = ti.xcom_pull(key='extracted_data', task_ids=['extract_data'])[0] | |
transformed_data = [] | |
for item in data: | |
transformed_data.append({ | |
'sku': item.get("sku", ""), | |
'Name': item['name'], | |
'Price': item['price'].get("original"), | |
'Brand Name': item['brand_name'], | |
'Thumbnail': f"https://img01.ztat.net/article/{item['media'][0]['path']}", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_data(url: str, headers: str, ti) -> None: | |
res = requests.get(url, headers=headers) | |
json_data = json.loads(res.content)['articles'] | |
ti.xcom_push(key='extracted_data', value=json_data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=UTF-8 | |
import pickle | |
import nltk | |
from nltk.corpus import brown | |
#TextBlob FastNPExtractor + ConllExtractor | |
# Textblob | |
from textblob import TextBlob | |
from textblob.np_extractors import FastNPExtractor | |
from textblob.np_extractors import ConllExtractor |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
from flask import app,Flask | |
from flask_restful import Resource, Api, reqparse | |
import elasticsearch | |
from elasticsearch import Elasticsearch | |
import datetime | |
import concurrent.futures | |
import requests | |
import json | |
except Exception as e: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from elasticsearch import helpers | |
res = helpers.bulk(es,gen(df),request_timeout= 300) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import uuid | |
def gen(df): | |
for i in df: | |
yield{ | |
"_index" : "my_med", | |
"_type" : "_doc", | |
"_id" : uuid.uuid4(), | |
"_source" : { | |
#"name": i.get("name"), | |
"name":i.get("name"), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setting ={ | |
"mappings" : { | |
"properties" : { | |
"Clean_Uses" : { | |
"type" : "text", | |
"fields" : { | |
"keyword" : { | |
"type" : "keyword", | |
"ignore_above" : 256 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df = pd.read_csv('clean_data.csv') | |
df = df.to_dict('records') |
NewerOlder