catherio · October 27, 2010 22:31 · catherio · Oct 27, 2010
diff --git a/sandbox.py b/sandbox.py
 from csc import divisi2

 # Build conceptnet
 cnet_graph = divisi2.load('data:graphs/conceptnet_en.graph')
 A = divisi2.network.sparse_matrix(cnet_graph, 'nodes', 'features', cutoff=5)

 # Build analogyspace
 U,S,V= A.normalize_all().svd()
 predictions = divisi2.reconstruct(U,S,V) # lazy
 #A_approx = divisi2.dot(U*S, V.T) # the 'real deal', very slow if A is big

 # Do another SVD on U to find the weighted concept-vector (ie, the feature)
 # of the most variance.
 # ("efspace" is "eigenfeaturespace")
 # (to_sparse() is so that it uses cleverer SVD math)
 (efspaceU, efspaceS, efspaceV) = U.normalize_all().to_sparse().svd(1)
 most_variance_feature = efspaceU.T[0]

 # Dot product this eigenfeature, through V*S*U^T, with all other features,
 # to find how much each natural-language-feature "matches" the eigenfeature
 closest_features = V.dot(U.T.dot(most_variance_feature).multiply(S))

 # The first question we will ask is the natural-language-feature which
 # is closest to the direction of most variance.
 first_question = closest_features.top_items(1)[0][0]


 # GAME PLAN:
 #
 # At any stage of the game, we have a matrix of concepts-to-features,
 # with the concept vectors weighted by how much they've been matching
 # at this stage of the game. I call these "w-concepts" for "weighted concepts"

 # ACTUAL DETAILS ARE FROM DIVISI, NOT DIVISI2!
 # 1. Do an SVD of analogyspace over features to find the first eigenfeature
 #  - normalized or not?
 #     -> In Divisi2, normalize_all method
 #  - how can I get at the v's?
 #     -> .v, then SVD the way you would conceptnet
 #     -> v, not weighted_v, because if weighted with sigma then output re-SVD'd would not necessarily be the same (THIS IS NOT ACTUALLY RELEVANT)
 #  - how can I see the axes, in order, of an SVD2DResults?
 #     -> .summarize() will print them, or .summarize_axis(0) for the first axis
 #     -> Try efspace.u[:, 0].top_items(1)[0][0]
 #     -> For opposities, do efspace.u[:,0].top_items(1, largest=False)[0][0]

 # IDEA: flip between top and bottom items, and vary the first "0" to 1, 2, to grab nearby possibilities.
 #  - NEEDS ANSWER: how can I get the thing pointing in most the right DIRECTION? Isn't that the one that's most like the eigenfeature? Do I want weighted? Weighted seems much more dramatically different
 #     -> strong, off-angle one has more information about it
 #     -> small, on-angle one is less known
 #     -> try both, but probably the first one
 #  - why are its dimensions 12976 by 121488, not 100 by something? still don't quite understand

 # 2. Ask about the first eigenfeature
 #    -> the commands above give questions like ('right', u'HasProperty', u'bad'), which are already in near-perfect format

 # 3. Given a yes/no answer about the eigenfeature asked, choose a weight
 #    function which takes a magnitude and returns either a sigmoid or a
 #    backwards sigmoid of that magnitude. (T/F -> function)
 #    - need SciPy to get the erf function, methinks
 # Idea: "probably" and "probably not" can trigger wider sigmoid functions

 # For each concept:
 # 4. Calculate the projection of the w-concept along the feature asked.
 #    -> How to turn question into a vector:
 #       vectorq = s.analogyspace_norm.weighted_v[question,:]
 #    -> How to calculate projection:

 # 5. Subtract out the projection from the w-concept.
 #    -> Ken is making this be clever for large, dense matrices
 #    -> For now, use a smaller segment of conceptnet
 # 6. Calculate the weight function for the magnitude of the projection, and
 #    weight w-concept vector by the result.
 # Repeat!
	from csc import divisi2

	# Build conceptnet
	cnet_graph = divisi2.load('data:graphs/conceptnet_en.graph')
	A = divisi2.network.sparse_matrix(cnet_graph, 'nodes', 'features', cutoff=5)

	# Build analogyspace
	U,S,V= A.normalize_all().svd()
	predictions = divisi2.reconstruct(U,S,V) # lazy
	#A_approx = divisi2.dot(U*S, V.T) # the 'real deal', very slow if A is big

	# Do another SVD on U to find the weighted concept-vector (ie, the feature)
	# of the most variance.
	# ("efspace" is "eigenfeaturespace")
	# (to_sparse() is so that it uses cleverer SVD math)
	(efspaceU, efspaceS, efspaceV) = U.normalize_all().to_sparse().svd(1)
	most_variance_feature = efspaceU.T[0]

	# Dot product this eigenfeature, through VSU^T, with all other features,
	# to find how much each natural-language-feature "matches" the eigenfeature
	closest_features = V.dot(U.T.dot(most_variance_feature).multiply(S))

	# The first question we will ask is the natural-language-feature which
	# is closest to the direction of most variance.
	first_question = closest_features.top_items(1)[0][0]


	# GAME PLAN:
	#
	# At any stage of the game, we have a matrix of concepts-to-features,
	# with the concept vectors weighted by how much they've been matching
	# at this stage of the game. I call these "w-concepts" for "weighted concepts"

	# ACTUAL DETAILS ARE FROM DIVISI, NOT DIVISI2!
	# 1. Do an SVD of analogyspace over features to find the first eigenfeature
	# - normalized or not?
	# -> In Divisi2, normalize_all method
	# - how can I get at the v's?
	# -> .v, then SVD the way you would conceptnet
	# -> v, not weighted_v, because if weighted with sigma then output re-SVD'd would not necessarily be the same (THIS IS NOT ACTUALLY RELEVANT)
	# - how can I see the axes, in order, of an SVD2DResults?
	# -> .summarize() will print them, or .summarize_axis(0) for the first axis
	# -> Try efspace.u[:, 0].top_items(1)[0][0]
	# -> For opposities, do efspace.u[:,0].top_items(1, largest=False)[0][0]

	# IDEA: flip between top and bottom items, and vary the first "0" to 1, 2, to grab nearby possibilities.
	# - NEEDS ANSWER: how can I get the thing pointing in most the right DIRECTION? Isn't that the one that's most like the eigenfeature? Do I want weighted? Weighted seems much more dramatically different
	# -> strong, off-angle one has more information about it
	# -> small, on-angle one is less known
	# -> try both, but probably the first one
	# - why are its dimensions 12976 by 121488, not 100 by something? still don't quite understand

	# 2. Ask about the first eigenfeature
	# -> the commands above give questions like ('right', u'HasProperty', u'bad'), which are already in near-perfect format

	# 3. Given a yes/no answer about the eigenfeature asked, choose a weight
	# function which takes a magnitude and returns either a sigmoid or a
	# backwards sigmoid of that magnitude. (T/F -> function)
	# - need SciPy to get the erf function, methinks
	# Idea: "probably" and "probably not" can trigger wider sigmoid functions

	# For each concept:
	# 4. Calculate the projection of the w-concept along the feature asked.
	# -> How to turn question into a vector:
	# vectorq = s.analogyspace_norm.weighted_v[question,:]
	# -> How to calculate projection:

	# 5. Subtract out the projection from the w-concept.
	# -> Ken is making this be clever for large, dense matrices
	# -> For now, use a smaller segment of conceptnet
	# 6. Calculate the weight function for the magnitude of the projection, and
	# weight w-concept vector by the result.
	# Repeat!