Created
May 26, 2018 05:10
-
-
Save fabrizioc1/2ad760a4cd83c1742f0d3d2e6f3da8ad to your computer and use it in GitHub Desktop.
Expand hadoop path
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def expand_hadoop_path(path): | |
brace_patterns = re.findall('{{[^}]+}}', path) | |
brace_patterns_values = [(brace_pattern, brace_pattern.replace('{{','').replace('}}','').split(',')) for brace_pattern in brace_patterns] | |
replacements = [] | |
for (brace_pattern, values) in brace_patterns_values: | |
if replacements: | |
replacements_count = len(replacements) | |
for i in range(replacements_count): | |
replacement = replacements[i] | |
for value in values: | |
if not replacement.has_key(brace_pattern): | |
replacement[brace_pattern] = value | |
else: | |
new_replacement = dict(replacement) | |
new_replacement[brace_pattern] = value | |
replacements.append(new_replacement) | |
else: | |
for value in values: | |
replacement = dict() | |
replacement[brace_pattern] = value | |
replacements.append(replacement) | |
expanded_paths = [] | |
for replacement in replacements: | |
expanded_path = str(path) | |
for brace_pattern, value in replacement.items(): | |
expanded_path = expanded_path.replace(brace_pattern, value) | |
expanded_paths.append(expanded_path) | |
return expanded_paths | |
testing_data = { | |
's3://bucket/{{highlq,lowlq}}/201804/{{bid,nobid}}/': ['s3://bucket/highlq/201804/bid/', 's3://bucket/lowlq/201804/bid/', 's3://bucket/highlq/201804/nobid/', 's3://bucket/lowlq/201804/nobid/'], | |
's3://bucket/{{highlq,midlq,lowlq}}/201804/{{bid,nobid}}/': ['s3://bucket/highlq/201804/bid/', 's3://bucket/midlq/201804/bid/', 's3://bucket/lowlq/201804/bid/', 's3://bucket/highlq/201804/nobid/', 's3://bucket/midlq/201804/nobid/', 's3://bucket/lowlq/201804/nobid/'], | |
's3://bucket/{{highlq,lowlq}}/2018{{03,04}}/{{bid,nobid}}/': ['s3://bucket/highlq/201803/bid/', 's3://bucket/lowlq/201803/bid/', 's3://bucket/highlq/201804/bid/', 's3://bucket/lowlq/201804/bid/', 's3://bucket/highlq/201803/nobid/', 's3://bucket/lowlq/201803/nobid/', 's3://bucket/highlq/201804/nobid/', 's3://bucket/lowlq/201804/nobid/'] | |
} | |
for (path, expected) in testing_data.items(): | |
assert set(expand_hadoop_path(path)) == set(expected), "expand_hadoop_path failed on #{path}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment