Last active
January 16, 2022 08:36
-
-
Save Umair444/e8305d12342ebb2e400251f66de3ed95 to your computer and use it in GitHub Desktop.
Normalize Data into Different Tables
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
T = readtable('original.csv'); | |
% n = cellfun(@(cIn) strsplit(cIn, ','), T.country, 'UniformOutput', false); | |
C = {0,0,0,0}; count = 0; U = {0,0,0,0}; | |
for i = [4, 5, 6, 11] % Director, Cast, Countries and Genre | |
tab = T{:, i}; | |
n = [T.show_id regexp(tab, ', ', 'split')]; | |
len = cellfun(@numel, n(:,2)); | |
repeat = repelem(n(:,1), len); | |
repeat = str2double(extractAfter(repeat, "s")); | |
t = [table(repeat, 'VariableNames', {'show_id'}) cell2table([n{:,2}]', ... | |
'VariableNames', string(T.Properties.VariableNames{i}))]; | |
t(t{:,2} == "", :) = []; | |
% create a unique table | |
t_u = unique(t{:, 2}); | |
clear a; | |
% create its autonumeric integer index; | |
idx = (1:length(string(t_u)))'; | |
% Merge, convert to table and write | |
t_u = table(idx, string(t_u), 'VariableNames', {'ID',t.Properties.VariableNames{2}}); | |
count = count+1; | |
% C{count} = t; | |
% U{count} = t_u; | |
writetable(t, string([T.Properties.VariableNames{i}, '.csv'])); | |
% Create a key relationship table | |
end | |
clear i count idx len n repeat; | |
% Column Creation | |
duration = str2double(extractBefore(T.duration, "min")); | |
seasons = str2double(extractBefore(lower(T.duration), "season")); | |
% coutries = unique(n); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sample
https://1drv.ms/u/s!AhbQPfViz6nIh6k8fvaCXiDaiZ3DFA?e=O1fkeW