Created
July 1, 2016 18:59
-
-
Save edglazer/a335e38dcfc4d8823b6cd6ee70743eae to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Timer unit: 1e-06 s | |
Total time: 5619.97 s | |
File: tribe/extract.py | |
Function: extract_graph at line 105 | |
Line # Hits Time Per Hit % Time Line Contents | |
============================================================== | |
105 @profile | |
106 def extract_graph(self): | |
107 """ | |
108 Extracts a Graph where the nodes are EmailAddress | |
109 """ | |
110 | |
111 1 3 3.0 0.0 def relationships(email): | |
112 """ | |
113 Inner function that constructs email relationships | |
114 """ | |
115 people = [email.sender,] | |
116 people.extend(email.recipients) | |
117 people.extend(email.copied) | |
118 | |
119 people = filter(lambda p: p is not None, people) # Filter out any None addresses | |
120 people = set(addr.email for addr in people if addr.email) # Obtain only unique people | |
121 people = sorted(people) # Sort lexicographically for combinations | |
122 | |
123 for combo in combinations(people, 2): | |
124 yield combo | |
125 | |
126 | |
127 # Keep track of all the email to email links | |
128 1 8 8.0 0.0 links = FreqDist() | |
129 | |
130 # Iterate over all the extracted emails | |
131 # Catch exceptions, if any, and move forward | |
132 # NOTE: This will allow the progress bar to work | |
133 # NOTE: This will build the graph data structure in memory | |
134 114956 1084094085 9430.5 19.3 for email in self.extract(): | |
135 114955 117687 1.0 0.0 try: | |
136 4870025 7047332 1.4 0.1 for combo in relationships(email): | |
137 4755070 6955436 1.5 0.1 links[combo] += 1 | |
138 except Exception as e: | |
139 self.errors[e] += 1 | |
140 continue | |
141 | |
142 # Construct the networkx graph and add edges | |
143 1 58 58.0 0.0 G = nx.Graph(name="Email Network", mbox=self.path, extracted=strfnow()) | |
144 90728 281650 3.1 0.0 for link in links.keys(): | |
145 90728 4521475670 49835.5 80.5 G.add_edge(*link, weight=links.freq(link)) # took out Changed per suggestion of @bbengfort, as calcultion may be too strenuous | |
146 # G.add_edge(*link, weight=links[link]) | |
147 | |
148 # Return the generated graph | |
149 return G |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment