Last active
April 2, 2017 21:27
-
-
Save hughdbrown/94016d0f477b855a893514131fc91f99 to your computer and use it in GitHub Desktop.
How correlated is rank of most mobile cities versus rank of most foreign-born cities?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
from re import compile | |
from abc import abstractmethod | |
from scipy.stats import spearmanr, pearsonr, linregress | |
import numpy as np | |
import matplotlib.pyplot as plt | |
table_re1 = compile(r"""^\d+\s+(?P<city>[^,]+),""") | |
table_re2 = compile(r"""^(?P<city>[^,\(]+)""") | |
class MobileForeignAbstract(object): | |
def __init__(self, mobile_data, foreign_data): | |
self.mobile_cities, self.foreign_born_cities = mobile_data, foreign_data | |
@abstractmethod | |
def mobile(self): | |
pass | |
@abstractmethod | |
def foreign(self): | |
pass | |
@abstractmethod | |
def compare(self, shared, mobile, foreign): | |
pass | |
@abstractmethod | |
def correlation(self, x, y): | |
pass | |
def graph(self, x, y): | |
reg = linregress(x, y) | |
slope, intercept = reg.slope, reg.intercept | |
ybar = np.array([slope * xx + intercept for xx in x]) | |
plt.scatter(x, y) | |
plt.plot(x, ybar) | |
ax = plt.gca() | |
ax.set_xlabel("Mobility") | |
ax.set_ylabel("Foreign born") | |
ax.set_title("Cities: mobility versus % foreign-born") | |
regression = self.correlation(x, y) | |
ax.text(min(x), max(y), r'correlation {0:.3f}'.format(regression)) | |
plt.show() | |
def shared(self, mobile, foreign): | |
# Find the 42 cities common to the two sets | |
# Dropped out: | |
# ['Dayton', 'Bridgeport', 'Nashville', 'Providence', 'Grand Rapids', | |
# 'Salt Lake City', 'Port St. Lucie', 'Manchester'] | |
return set(mobile).intersection(set(foreign)) | |
class MobileForeignOrdinal(MobileForeignAbstract): | |
def mobile(self): | |
return [ | |
table_re1.match(line).groupdict()["city"] | |
for line in self.mobile_cities.splitlines() | |
] | |
def foreign(self): | |
return [ | |
table_re2.match(line).groupdict()["city"].strip() | |
for line in self.foreign_born_cities.splitlines() | |
] | |
def compare(self, shared, mobile, foreign): | |
shared_mobile = [city for city in mobile if city in shared] | |
shared_foreign = [city for city in foreign if city in shared] | |
mapped = [ | |
(i, shared_foreign.index(city)) | |
for i, city in enumerate(shared_mobile) | |
] | |
mapped = np.array(mapped) | |
return mapped[:, 0], mapped[:, 1] | |
def correlation(self, x, y): | |
return spearmanr(x, y).correlation | |
class MobileForeignNumeric(MobileForeignAbstract): | |
def mobile(self): | |
return { | |
table_re1.match(line).groupdict()["city"]: float(line.split(' ')[-4]) | |
for line in self.mobile_cities.splitlines() | |
} | |
def foreign(self): | |
return { | |
table_re2.match(line).groupdict()["city"].strip(): float(line.split('\t')[-2]) | |
for line in self.foreign_born_cities.splitlines() | |
} | |
def compare(self, shared, mobile, foreign): | |
mapped = { | |
city: (mobile[city], foreign[city]) | |
for city in shared | |
} | |
mapped = np.array(mapped.values()) | |
return mapped[:, 0], mapped[:, 1] | |
def correlation(self, x, y): | |
return pearsonr(x, y)[0] | |
# From http://www.rajchetty.com/chettyfiles/mobility_geo.pdf | |
mobile_cities = """1 Salt Lake City, Utah 1,426,729 46.2 10.8 77.3 0.264 | |
2 Pittsburgh, Pennsylvania 2,561,364 45.2 9.5 74.9 0.359 | |
3 San Jose, California 2,393,183 44.7 12.9 73.5 0.235 | |
4 Boston, Massachusetts 4,974,945 44.6 10.5 73.7 0.322 | |
5 San Francisco, California 4,642,561 44.4 12.2 72.5 0.250 | |
6 San Diego, California 2,813,833 44.3 10.4 74.3 0.237 | |
7 Manchester, New Hampshire 1,193,391 44.2 10.0 75.0 0.296 | |
8 Minneapolis, Minnesota 2,904,389 44.2 8.5 75.2 0.338 | |
9 Newark, New Jersey 5,822,286 44.1 10.2 73.7 0.350 | |
10 New York, New York 11,781,395 43.8 10.5 72.2 0.330 | |
11 Los Angeles, California 16,393,360 43.4 9.6 73.8 0.231 | |
12 Providence, Rhode Island 1,582,997 43.4 8.2 73.6 0.333 | |
13 Washington, DC 4,632,415 43.2 11.0 72.2 0.330 | |
14 Seattle, Washington 3,775,744 43.2 10.9 72.0 0.273 | |
15 Houston, Texas 4,504,013 42.8 9.3 74.7 0.325 | |
16 Sacramento, California 2,570,609 42.7 9.7 71.3 0.257 | |
17 Bridgeport, Connecticut 3,405,565 42.4 7.9 72.4 0.359 | |
18 Fort Worth, Texas 1,804,370 42.3 9.1 73.6 0.320 | |
19 Denver, Colorado 2,449,044 42.2 8.7 73.3 0.294 | |
20 Buffalo, New York 2,369,699 42.0 6.7 73.1 0.368 | |
21 Miami, Florida 3,955,969 41.5 7.3 76.3 0.267 | |
22 Fresno, California 1,419,998 41.3 7.5 71.3 0.295 | |
23 Portland, Oregon 1,842,889 41.3 9.3 70.5 0.277 | |
24 San Antonio, Texas 1,724,863 41.1 6.4 74.3 0.320 | |
25 Philadelphia, Pennsylvania 5,602,247 40.8 7.4 69.6 0.393 | |
26 Austin, Texas 1,298,076 40.4 6.9 71.9 0.323 | |
27 Dallas, Texas 3,405,666 40.4 7.1 72.6 0.347 | |
28 Phoenix, Arizona 3,303,211 40.3 7.5 70.6 0.294 | |
29 Grand Rapids, Michigan 1,286,045 40.1 6.4 71.3 0.378 | |
30 Kansas City, Missouri 1,762,873 40.1 7.0 70.4 0.365 | |
31 Las Vegas, Nevada 1,568,418 40.0 8.0 71.1 0.259 | |
32 Chicago, Illinois 8,183,799 39.4 6.5 70.8 0.393 | |
33 Milwaukee, Wisconsin 1,660,659 39.3 4.5 70.3 0.424 | |
34 Tampa, Florida 2,395,997 39.1 6.0 71.3 0.335 | |
35 Orlando, Florida 1,697,906 39.1 5.8 71.5 0.326 | |
36 Port St. Lucie, Florida 1,533,306 39.0 6.2 71.2 0.303 | |
37 Baltimore, Maryland 2,512,431 38.8 6.4 67.7 0.412 | |
38 St. Louis, Missouri 2,325,609 38.4 5.1 69.0 0.413 | |
39 Dayton, Ohio 1,179,009 38.3 4.9 68.2 0.397 | |
40 Cleveland, Ohio 2,661,167 38.2 5.1 68.7 0.405 | |
41 Nashville, Tennessee 1,246,338 38.2 5.7 67.9 0.357 | |
42 New Orleans, Louisiana 1,381,652 38.2 5.1 69.5 0.397 | |
43 Cincinnati, Ohio 1,954,800 37.9 5.1 66.4 0.429 | |
44 Columbus, Ohio 1,663,807 37.7 4.9 67.1 0.406 | |
45 Jacksonville, Florida 1,176,696 37.5 4.9 68.9 0.361 | |
46 Detroit, Michigan 5,327,827 37.3 5.5 68.5 0.358 | |
47 Indianapolis, Indiana 1,507,346 37.2 4.9 67.5 0.398 | |
48 Raleigh, North Carolina 1,412,127 36.9 5.0 67.3 0.389 | |
49 Atlanta, Georgia 3,798,017 36.0 4.5 69.4 0.366 | |
50 Charlotte, North Carolina 1,423,942 35.8 4.4 67.0 0.397""" | |
# From https://en.wikipedia.org/wiki/List_of_United_States_cities_by_foreign-born_population | |
foreign_born_cities = """Hialeah, Florida 218,901 162,951 74.4 1 | |
Miami, Florida 433,143 244,352 56.4 2 | |
Santa Ana, California 340,378 166,960 49.1 3 | |
Fremont, California 205,521 88,211 42.9 4 | |
Los Angeles, California 3,831,880 1,521,119 39.7 5 | |
San Jose, California 964,679 367,711 38.1 6 | |
Anaheim, California 337,899 127,111 37.6 7 | |
New York, New York 8,391,881 2,996,580 35.7 8 | |
Irvine, California 209,707 74,225 35.4 9 | |
Jersey City, New Jersey 242,513 84,583 34.9 10 | |
Irving, Texas 205,549 70,263 34.2 11 | |
San Francisco, California 815,358 278,369 34.1 12 | |
Chula Vista, California 223,746 71,797 32.1 13 | |
Yonkers, New York 201,073 62,818 31.2 14 | |
Oakland, California 409,151 116,794 28.5 15 | |
Houston, Texas 2,260,918 644,167 28.5 16 | |
Laredo, Texas 226,419 62,653 27.7 17 | |
Stockton, California 287,584 79,511 27.6 18 | |
Garland, Texas 222,013 60,187 27.1 19 | |
Long Beach, California 462,594 124,340 26.9 20 | |
Newark, New Jersey 278,157 74,762 26.9 21 | |
Honolulu, Hawaii (CDP) 374,658 96,752 25.8 22 | |
Paradise, Nevada (CDP) 202,987 51,090 25.2 23 | |
Boston, Massachusetts 645,187 161,740 25.1 24 | |
San Diego, California 1,306,228 325,819 24.9 25 | |
North Las Vegas, Nevada 224,416 55,805 24.9 26 | |
Dallas, Texas 1,299,590 322,072 24.8 27 | |
El Paso, Texas 620,440 151,295 24.4 28 | |
Riverside, California 297,863 69,792 23.4 29 | |
Plano, Texas 273,381 62,217 22.8 30 | |
Aurora, Colorado 323,288 70,747 21.9 31 | |
Sacramento, California 466,685 102,076 21.9 32 | |
Arlington, Virginia (CDP) 217,483 47,379 21.8 33 | |
Phoenix, Arizona 1,593,660 346,430 21.7 34 | |
Las Vegas, Nevada 567,610 119,437 21 35 | |
Chicago, Illinois 2,850,502 588,480 20.6 36 | |
Fresno, California 479,911 97,316 20.3 37 | |
Austin, Texas 790,593 159,353 20.2 38 | |
Arlington, Texas 380,072 76,440 20.1 39 | |
Fort Worth, Texas 731,588 131,197 17.9 40 | |
Reno, Nevada 219,649 37,964 17.3 41 | |
Seattle, Washington 616,669 105,154 17.1 42 | |
Tucson, Arizona 543,907 90,794 16.7 43 | |
Orlando, Florida 235,876 39,210 16.6 44 | |
Bakersfield, California 324,479 53,480 16.5 45 | |
Denver, Colorado 610,345 95,585 15.7 46 | |
Minneapolis, Minnesota 385,384 59,093 15.3 47 | |
St. Paul, Minnesota 281,244 42,669 15.2 48 | |
Modesto, California 202,740 30,464 15 49 | |
Tampa, Florida 343,879 50,377 14.6 50 | |
Glendale, Arizona 253,210 36,197 14.3 51 | |
Charlotte, North Carolina 704,417 96,734 13.7 52 | |
San Antonio, Texas 1,373,677 180,895 13.2 53 | |
Durham, North Carolina 229,147 30,174 13.2 54 | |
Raleigh, North Carolina 405,197 53,154 13.1 55 | |
Scottsdale, Arizona 237,834 30,074 12.6 56 | |
Chandler, Arizona 249,515 31,551 12.6 57 | |
Portland, Oregon 566,606 71,380 12.6 58 | |
Mesa, Arizona 467,178 56,895 12.2 59 | |
Washington, D.C. 599,657 72,110 12 60 | |
Henderson, Nevada 256,424 30,462 11.9 61 | |
Nashville-Davidson, Tennessee 605,466 70,404 11.6 62 | |
Philadelphia, Pennsylvania 1,547,297 179,444 11.6 63 | |
Winston-Salem, North Carolina 229,826 26,064 11.3 64 | |
Oklahoma City, Oklahoma 560,226 63,199 11.3 65 | |
Albuquerque, New Mexico 529,216 57,298 10.8 66 | |
Columbus, Ohio 773,021 83,091 10.7 67 | |
Des Moines, Iowa 200,569 21,050 10.5 68 | |
Greensboro, North Carolina 255,141 26,410 10.4 69 | |
St. Petersburg, Florida 244,318 25,094 10.3 70 | |
Milwaukee, Wisconsin 605,027 59,785 9.9 71 | |
Anchorage, Alaska (municipality) 286,174 27,107 9.5 72 | |
Omaha, Nebraska 454,714 43,029 9.5 73 | |
Tulsa, Oklahoma 389,369 36,514 9.4 74 | |
Gilbert, Arizona (town) 222,092 20,606 9.3 75 | |
Jacksonville, Florida 813,518 73,992 9.1 76 | |
Wichita, Kansas 372,194 33,608 9 77 | |
Boise City, Idaho 205,698 18,221 8.9 78 | |
Madison, Wisconsin 235,410 20,736 8.8 79 | |
Virginia Beach, Virginia 433,575 36,727 8.5 80 | |
Corpus Christi, Texas 287,231 23,315 8.1 81 | |
Lexington-Fayette, Kentucky (urban county) 296,545 23,967 8.1 82 | |
Indianapolis (balance), Indiana 807,640 63,241 7.8 83 | |
Colorado Springs, Colorado 399,803 31,266 7.8 84 | |
Fort Wayne, Indiana 251,825 19,055 7.6 85 | |
Atlanta, Georgia 540,932 39,733 7.3 86 | |
Buffalo, New York 270,221 19,618 7.3 87 | |
Kansas City, Missouri 482,228 34,085 7.1 88 | |
Lincoln, Nebraska 254,008 17,128 6.7 89 | |
Detroit, Michigan 910,848 60,170 6.6 90 | |
Rochester, New York 207,291 13,454 6.5 91 | |
Baltimore, Maryland 637,418 41,343 6.5 92 | |
Pittsburgh, Pennsylvania 311,640 19,993 6.4 93 | |
Norfolk, Virginia 233,333 14,562 6.2 94 | |
Richmond, Virginia 204,451 12,301 6 95 | |
St. Louis, Missouri 356,587 21,177 5.9 96 | |
Spokane, Washington 203,268 11,562 5.7 97 | |
Louisville, Kentucky ("balance") 566,492 31,315 5.5 98 | |
Memphis, Tennessee 676,646 36,519 5.4 99 | |
New Orleans, Louisiana 354,850 18,968 5.3 100 | |
Lubbock, Texas 225,865 10,709 4.7 101 | |
Akron, Ohio 207,208 9,526 4.6 102 | |
Baton Rouge, Louisiana 225,388 10,235 4.5 103 | |
Cleveland, Ohio 431,369 19,495 4.5 104 | |
Chesapeake, Virginia 222,455 9,838 4.4 105 | |
Birmingham, Alabama 231,824 8,883 3.8 106 | |
Cincinnati, Ohio 333,013 11,471 3.4 107 | |
Toledo, Ohio 316,164 8,793 2.8 108 | |
Montgomery, Alabama 201,465 5,454 2.7 109""" | |
def main(model): | |
m = model(mobile_cities, foreign_born_cities) | |
mobile = m.mobile() | |
foreign = m.foreign() | |
shared = m.shared(mobile, foreign) | |
x, y = m.compare(shared, mobile, foreign) | |
m.graph(x, y) | |
if __name__ == '__main__': | |
#main(MobileForeignOrdinal) | |
main(MobileForeignNumeric) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Pearson correlation is about 55%.