Skip to content

Instantly share code, notes, and snippets.

@shiumachi
Created November 29, 2018 01:56
Show Gist options
  • Save shiumachi/11d9bb11547f9fc316d989cbe18ec74b to your computer and use it in GitHub Desktop.
Save shiumachi/11d9bb11547f9fc316d989cbe18ec74b to your computer and use it in GitHub Desktop.
from convert_to_datetime import convert_to_datetime
def add_date_to_log(line):
""" add YYMMDDhh to the beginning of the log.
Argument:
line (hadoop log line)
"""
arr = line.rstrip().split()
date_string = ' '.join(arr[0:2])
dt = convert_to_datetime(date_string)
hour_tag = "{0}{1}{2}".format(dt.year, dt.day, dt.hour)
return "{0} {1}".format(hour_tag, line)
if __name__ == '__main__':
nn_line = "2014-01-17 12:21:44,327 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addStoredBlock: blockMap updated: 192.168.0.1:50010 is added to blk_-2607434453651253239_8145228 size 125334"
print("NN log: {0}".format(nn_line))
print("add_yeardayhour_to_log: {0}".format(add_yeardayhour_to_log(nn_line)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment