Skip to content

Instantly share code, notes, and snippets.

@Swarchal
Created January 17, 2020 10:29
Show Gist options
  • Save Swarchal/cdc36a08a36e6db4db0c6d989a2bd688 to your computer and use it in GitHub Desktop.
Save Swarchal/cdc36a08a36e6db4db0c6d989a2bd688 to your computer and use it in GitHub Desktop.
parse metadata from yokogawa CV{7,8}000 filepaths
import os
from collections import namedtuple
from typing import NamedTuple, List
import pandas as pd
def parse_filepath(filepath: str) -> NamedTuple:
"""
0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20
T|0|0|0|1|F|0|0|6|L|0 |1 |A |0 |4 |Z |0 |1 |C |0 |2
------------------------------------------------------
example:
>>> filepath = "test_N22_T0001F006L01A04Z01C02.tif"
>>> parse_filepath(filepath)
("well": "N22",
"site": 6,
"z": 1
"channel": 2,
"filepath: "test_N22_T0001F006L01A04Z01C02.tif")
"""
final_path = filepath.split(os.path.sep)[-1]
output = namedtuple("Yoko", ["well", "site", "z", "channel", "filepath"])
*_, well, rest = final_path.split("_")
rest = rest.replace(".tif", "")
site = int(rest[6:9])
z = int(rest[16:18])
channel = int(rest[-2:])
return output(well, site, z, channel, filepath)
def clean_paths(paths: List[str]) -> List[str]:
"""
remove unwanted files, likely to break as it makes a lot of assumptions
"""
output = []
for p in paths:
final_path = p.split(os.sep)[-1]
if p.endswith(".tif") and len(final_path.split("_")) == 3 and "#" not in p:
output.append(p)
assert len(output) >= 1
return output
def create_metadata_dataframe(data_dir: str) -> pd.DataFrame:
"""
create a simple dataframe of the filepath and the metadata
"""
assert os.path.isdir(data_dir), f"{data_dir} not found"
all_paths = os.listdir(data_dir)
assert len(all_paths) > 1, f"no files found in {data_dir}"
full_paths = [os.path.join(data_dir, p) for p in all_paths]
paths = clean_paths(full_paths)
tuple_list = [parse_filepath(p) for p in paths]
return pd.DataFrame(tuple_list)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment