ortsed · January 12, 2020 01:51
diff --git a/merge_similar.py b/merge_similar.py
 import pandas as pd
 def merge_similar(files=[], encoding=None):
 """ 
 Concats datasets with similar but not necessarily the same columns
 by creating empty columns for each dataframe missing a column found in the others 
 """

 	merged = []
 	for file in files:
 		df = pd.read_csv(file, encoding=encoding)
 		if not len(merged): 
 			merged = df
 		else:
 			all_cols = list(set(list(merged.columns) + list(df)))
 			for col in [col for col in all_cols if col not in merged.columns]:
 				merged[col] = None
 			
 			for col in [col for col in all_cols if col not in df.columns]:
 				df[col] = None
 			merged = pd.concat([merged,df])
 			
 	return merged
	import pandas as pd
	def merge_similar(files=[], encoding=None):
	"""
	Concats datasets with similar but not necessarily the same columns
	by creating empty columns for each dataframe missing a column found in the others
	"""

	merged = []
	for file in files:
	df = pd.read_csv(file, encoding=encoding)
	if not len(merged):
	merged = df
	else:
	all_cols = list(set(list(merged.columns) + list(df)))
	for col in [col for col in all_cols if col not in merged.columns]:
	merged[col] = None

	for col in [col for col in all_cols if col not in df.columns]:
	df[col] = None
	merged = pd.concat([merged,df])

	return merged
No results found