Created
October 20, 2011 00:26
-
-
Save amorton/1300076 to your computer and use it in GitHub Desktop.
sstable_link
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # encoding: utf-8 | |
| """ | |
| Creates a directory of sym links to SSTables with a number higher than | |
| a specified value. | |
| Handy when you want to copy a delta. | |
| See command line help for usage. | |
| """ | |
| import argparse | |
| import glob | |
| import logging | |
| import os.path | |
| import sys | |
| logging.basicConfig(level=logging.DEBUG) | |
| log = logging.getLogger(__file__) | |
| def abs_out_dir(out_dir): | |
| """Gets the absolute out dir and ensure it is created. | |
| :param out_dir: out dir. | |
| :raises RuntimeError: Directory has existing files. | |
| :returns: abs path to put the links in. | |
| """ | |
| abs_dir = os.path.abspath(out_dir) | |
| if not os.path.exists(abs_dir): | |
| os.makedirs(abs_dir) | |
| log.info("Absolute path is %s", abs_dir) | |
| _, _, existing_files = os.walk(abs_dir).next() | |
| log.info("Existing link files %s", existing_files) | |
| if existing_files: | |
| raise RuntimeError("Directory %s has existing files" % abs_dir) | |
| return abs_dir | |
| def find_keyspace_dirs(keyspace, data_dirs, snapshot_name): | |
| """Find the keyspace directories to link the sstables from. | |
| :param keyspace: Keyspace name. | |
| :param data_dirs: comma separated list of data dirs. | |
| :param snapshot_name: Optional snapshot to get the files from. | |
| :raises RuntimeError: Dirs do not exist. | |
| :returns: list of keyspace directories. | |
| """ | |
| ks_dirs = [ | |
| os.path.join(data_dir, keyspace) | |
| for data_dir in data_dirs.split(",") | |
| ] | |
| if snapshot_name: | |
| dirs = [ | |
| os.path.join(base, "snapshots/%s" % snapshot_name) | |
| for base in ks_dirs | |
| ] | |
| else: | |
| dirs = ks_dirs | |
| not_exist = [ | |
| d | |
| for d in dirs | |
| if not os.path.exists(d) | |
| ] | |
| if not_exist: | |
| raise RuntimeError("Keyspace dirs do not exist %s", not_exist) | |
| log.debug("Keyspace dirs %s", dirs) | |
| return dirs | |
| def matching_sstables(keyspace_dirs, column_family, sstable_number): | |
| """Find the sstable files we want to link from. | |
| :param keyspace_dirs: List of keyspace dirs to check. | |
| :param column_family: Column Family to get files form. | |
| :param sstable_number: Find sstables that are higher than this. | |
| :return: List of absolute sstable paths. | |
| """ | |
| def split_filename(filename): | |
| # e.g. TwitterUserHistoricalValues-20983-Data.db | |
| # e.g. TwitterUserHistoricalValues-g-20983-Data.db | |
| # returns a tuple of (cf_name, file_version, file_number, component) | |
| parts = [] | |
| state = "initial" | |
| for token in filename.split("-"): | |
| if state == "initial": | |
| parts.append(token) | |
| state = "cf_name" | |
| elif state == "cf_name": | |
| #may be a number of a version identifier. | |
| if token.isalpha(): | |
| #version | |
| parts.append(token) | |
| state = "version" | |
| else: | |
| #is file num, version is 0.6 | |
| parts.append("h") | |
| parts.append(int(token)) | |
| state = "file_num" | |
| elif state == "version": | |
| # is file num | |
| parts.append(int(token)) | |
| state = "file_num" | |
| elif state == "file_num": | |
| # component, e.g. data, filter, index | |
| assert token.endswith(".db") | |
| parts.append(token.replace(".db", "")) | |
| state = "finished" | |
| assert state == "finished" | |
| return tuple(parts) | |
| match_files = [] | |
| for ks_dir in keyspace_dirs: | |
| _, _, files = os.walk(ks_dir).next() | |
| for f in files: | |
| #skip any compacted markers | |
| if f.endswith("-Compacted"): | |
| log.debug("Skipping compacted marker %s", f) | |
| continue | |
| if not f.startswith(column_family): | |
| log.debug("Not from cf %s skipping %s", column_family, f) | |
| continue | |
| parts = split_filename(f) | |
| log.debug("For file %s got parts %s", f, parts) | |
| if parts[2] <= sstable_number: | |
| log.debug("File %s younger than %s skipping", f, | |
| sstable_number) | |
| continue | |
| match_files.append(os.path.join(ks_dir, f)) | |
| log.info("Matched files %s", match_files) | |
| return match_files | |
| def link_files(out_dir, files): | |
| """sym link the files in the out_dir. | |
| :param out_dir: dir to put the sym links in. | |
| :param files: Files to link to. | |
| :returns: Nada | |
| """ | |
| for f in files: | |
| _, name = os.path.split(f) | |
| dest = os.path.join(out_dir, name) | |
| log.debug("Linking %s to %s" % (dest, f)) | |
| os.symlink(f, dest) | |
| return | |
| def get_parser(): | |
| parser = argparse.ArgumentParser(prog="SSTable Linker", | |
| description="Creates a directory with links to SSTables created "\ | |
| "after a specified number.") | |
| parser.add_argument('keyspace', metavar="keyspace", type=str, | |
| help='Keyspace to link sstables from.') | |
| parser.add_argument('column_family', type=str, | |
| help='Column Family to link sstables from.') | |
| parser.add_argument('file_number', type=int, | |
| help='SSTable File number, get all files after this number') | |
| parser.add_argument("--out_dir", type=str, default="./linked", | |
| help="Directory to put the links in.") | |
| parser.add_argument("--data_dir", type=str, | |
| default="/var/lib/cassandra/data", | |
| help="Data directory(s), comma separated.") | |
| parser.add_argument("--snapshot", type=str, | |
| default="", | |
| help="Optional shapshot name to use.") | |
| return parser | |
| def main(): | |
| parser = get_parser() | |
| cmd_args = parser.parse_args() | |
| log.debug("Got args %s", cmd_args) | |
| out_dir = abs_out_dir(cmd_args.out_dir) | |
| keyspace_dirs = find_keyspace_dirs(cmd_args.keyspace, cmd_args.data_dir, | |
| cmd_args.snapshot) | |
| match_files = matching_sstables(keyspace_dirs, cmd_args.column_family, | |
| cmd_args.file_number) | |
| if not match_files: | |
| raise RuntimeError("No matching files.") | |
| link_files(out_dir, match_files) | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(main() or 0) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment