AayushSameerShah · May 27, 2021 11:19 · AayushSameerShah · May 27, 2021
diff --git a/categorize_multiple2.py b/categorize_multiple2.py
 '''Example data

    Name       Genre
 0   TENET      Action|Thriller
 1   MEMENTO    Crime|Thriller|Action
 2   AVENGERS   Children's
 '''
 # SPOILER ALERT: This method is the UNDERLYING method. Just use - df.Genre.str.get_dummies("|") for the same result (more on this later)

 # Step 1: Get the unique genre
 gens = []
 for gen in df.Genre:
    gens.extend(gen.split("|"))
 gens = pd.unique(gens)


 # Step 2: Construct the DF to store 0 and 1
 zero_one = DataFrame(np.zeros(len(df.Name), len(gens), columns= gens))


 # Step 3: MAIN - Use .get_indexer to get location for each movie's genre
 for i, gen in enumerate(df.Genre):
    indices = zero_one.columns.get_indexer(gen.split("|"))
    zerp_one.iloc[i, indices] = 1
                     
 # DONE!

 '''Now MORE ON THIS LATER part:

 The 
 df.Genre.str.get_dummies("|") 
 would have resulted the same but in the one line!

 Do that... and if you want to learn the internals, then go for the written part!
 '''
	'''Example data

	Name Genre
	0 TENET Action\|Thriller
	1 MEMENTO Crime\|Thriller\|Action
	2 AVENGERS Children's
	'''
	# SPOILER ALERT: This method is the UNDERLYING method. Just use - df.Genre.str.get_dummies("\|") for the same result (more on this later)

	# Step 1: Get the unique genre
	gens = []
	for gen in df.Genre:
	gens.extend(gen.split("\|"))
	gens = pd.unique(gens)


	# Step 2: Construct the DF to store 0 and 1
	zero_one = DataFrame(np.zeros(len(df.Name), len(gens), columns= gens))


	# Step 3: MAIN - Use .get_indexer to get location for each movie's genre
	for i, gen in enumerate(df.Genre):
	indices = zero_one.columns.get_indexer(gen.split("\|"))
	zerp_one.iloc[i, indices] = 1

	# DONE!

	'''Now MORE ON THIS LATER part:

	The
	df.Genre.str.get_dummies("\|")
	would have resulted the same but in the one line!

	Do that... and if you want to learn the internals, then go for the written part!
	'''