This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _preprocessing_fn(inputs: Dict[Text, Any], dense_float_feature_keys, input_feature_keys) -> Dict[Text, Any]: | |
outputs = {} | |
for key in [k for k in dense_float_feature_keys if k in input_feature_keys]: | |
outputs[transformed_name(key) | |
] = tft.scale_to_z_score(inputs[key]) | |
return outputs |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$project-name/ml/pipelines/ | |
├── __init__.py | |
├── data | |
├── util | |
│ ├── __init__.py | |
│ ├── input_fn_utils.py | |
│ └── model_utils.py | |
├── kfp_runner.py | |
├── local_beam_dag_runner.py | |
├── model_utils.py |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Τρία πουλάκια κάθονταν στου Διάκου το ταμπούρι | |
το 'να τηράει τη Λειβαδιά και τ' άλλο το Ζητούνι | |
το τρίτο το καλύτερο μοιρολογάει και λέει | |
Πολλή μαυρίλα πλάκωσε μαύρη σαν καλιακούδα | |
Μην ο Καλύβας έρχεται μην ο Λεβεντογιάννης | |
Νουδ' ο Καλύβας έρχεται νουδ' ο Λεβεντογιάννης | |
Ομέρ Βρυώνης πλάκωσε με δεκαοχτώ χιλιάδες | |
Ο Διάκος σαν τ' αγροίκησε πολύ του κακοφάνει | |
Ψηλή φωνή εσήκωσε τον πρώτο του φωνάζει | |
Τον ταϊφά μου σύναξε μάσε τα παλικάρια |
We can make this file beautiful and searchable if this error is corrected: It looks like row 6 should actually have 12 columns, instead of 10 in line 5.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Timestamp,Πόσα χρόνια δουλεύεις επαγγελματικά ως προγραμματιστής;,Με τι είδος development ασχολείσαι επαγγελματικά αυτή την περίοδο;,Σε ποιες γλώσσες προγραμματισμού δουλεύεις επαγγελματικά αυτή την περίοδο;,Τι μέγεθος είναι η εταιρεία που δουλεύεις;,Ποιος είναι ο τρόπος εργασίας;,Έχεις άτομα υπό την επίβλεψη σου;,Έχεις προσωπικά projects ή κάνεις freelancing πέρα από την κύρια εργασία σου;,Σε ποια πόλη μένεις;,Σε ποια πόλη δουλεύεις;,Φύλλο;,Ποιος είναι ο ετήσιος καθαρός μισθός σου; | |
7/15/2020 12:03:11,4-5,"DevOps, Backend, Frontend","C#, JavaScript",11-50,Και τα δύο,Όχι,Ναι,Αθήνα,Αθήνα,Άντρας,18200 | |
7/15/2020 12:03:58,1-3,"Backend, Frontend","C#, JavaScript",11-50,Στον χώρο του εργοδότη,Όχι,Όχι,Δράμα,Καβάλα,Άντρας,13100 | |
7/15/2020 12:05:43,1-3,Full Stack Web Developer,"C#, JavaScript, PHP",11-50,Και τα δύο,Όχι,Όχι,Αθήνα,Αθήνα,Άντρας,5460 | |
7/15/2020 12:11:37,11+,Backend,"Java, C#",11-50,Και τα δύο,Ναι,Όχι,ΠΕΙΡΑΙΑΣ,ΠΕΙΡΑΙΑΣ,Άντρας,43500 | |
7/15/2020 12:12:45,1-3,Frontend,JavaScript,11-50,Και τα δύο,Όχι,Όχι,Αθήνα,Αθή |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fluent_tfx as ftfx | |
current_dir = os.path.dirname( | |
os.path.realpath(__file__)) | |
# get the user code file | |
user_code_file = os.path.join(os.path.dirname( | |
os.path.realpath(__file__)), 'model_code.py') | |
print( | |
f'Using {user_code_file} for preprocessing, training and tuning functions') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
H_SIZE = 'h_size' | |
def _get_hyperparameters() -> kerastuner.HyperParameters: | |
hp = kerastuner.HyperParameters() | |
hp.Choice(H_SIZE, [5, 10]) | |
return hp | |
def _build_keras_model(hparams: kerastuner.HyperParameters) -> tf.keras.Model: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _gzip_reader_fn(filenames): | |
return tf.data.TFRecordDataset(filenames, compression_type='GZIP') | |
def _input_fn(file_pattern: List[Text], | |
tf_transform_output: tft.TFTransformOutput, | |
batch_size: int = 32) -> tf.data.Dataset: | |
transformed_feature_spec = ( | |
tf_transform_output.transformed_feature_spec().copy()) | |
dataset = tf.data.experimental.make_batched_features_dataset( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LABEL_KEY = 'lbl' | |
DENSE_FEATURES = ['a', 'b'] | |
BINARY_FEATURES = ['c'] | |
def preprocessing_fn(inputs: Dict[Text, Any]) -> Dict[Text, Any]: | |
outputs = {} | |
for feat in DENSE_FEATURES: | |
outputs[f'{feat}_xf'] = tft.scale_to_z_score(inputs[feat]) | |
for feat in BINARY_FEATURES: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
metadata_connection_config = metadata.sqlite_metadata_connection_config( | |
os.path.join(bucket_uri, 'schema_generation', 'metadata.db')) # defaults to this | |
with metadata.Metadata(metadata_connection_config) as store: | |
schema_artifacts = store.get_artifacts_by_type( | |
standard_artifacts.Schema.TYPE_NAME) | |
latest_schema_uri = max(schema_artifacts, key=attrgetter( | |
'last_update_time_since_epoch')).uri |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PIPELINE_ROOT = '<your project root>/bucket' # pretend this is a storage bucket in the cloud | |
METADATA_STORE = f'{PIPELINE_ROOT}/metadata_store.db' | |
STAGING = 'staging' | |
TEMP = 'temp' | |
PROJECT_ID = '' | |
JOB_NAME = '' | |
DATASET_PATTERN = 'taxi_dataset.csv' |