Created
April 13, 2019 20:31
-
-
Save netskink/234349c94998c86a6d061f19fb938140 to your computer and use it in GitHub Desktop.
can tf.data.experimental.CsvDataset work with tab as a field sep?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
It looks like copy and paste of the tsv file replaced tabs with spaces. Here is where the original file can be found. | |
https://waterdata.usgs.gov/nwis/dv?cb_00045=on&cb_62620=on&format=rdb&site_no=0204288721&referred_module=sw&period=&begin_date=2018-04-12&end_date=2019-04-12 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import absolute_import, division, print_function | |
from pathlib import Path | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import seaborn as sns | |
import tensorflow as tf | |
from tensorflow import keras | |
from tensorflow.keras import layers | |
print(tf.__version__) | |
# assumes .tsv file is in a directory named "tsv" above the source file | |
file_path = Path('../tsv') | |
file_name = 'SN0204288721.tsv' | |
file_path = file_path.joinpath(file_name) | |
# skip the comments but not the headers | |
# the comments are at lines 1-31 | |
# the column headings is at line 32 | |
# the 5s 15s 20d 14n ... line ends is at 33 | |
num_lines_to_skip = 33 | |
# use csv file reader but with tab as separator | |
text_lines_ds = tf.data.experimental.CsvDataset(filenames=str(file_path), | |
record_defaults = [tf.string, # usgs | |
tf.int64, # site_no | |
tf.string, # date_time | |
tf.float32, # Tidal High | |
tf.string, # Tidal High qual code | |
tf.float32, # Tidal Low | |
tf.string, # Tidal Low qual code | |
tf.float32, # Precipitation (inches) | |
tf.string], # Precip qual code | |
header=False, | |
field_delim='\t' # trying to use tab sep | |
).skip(num_lines_to_skip) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment