Skip to content

Instantly share code, notes, and snippets.

@hughdbrown
Last active April 2, 2017 21:27
Show Gist options
  • Save hughdbrown/94016d0f477b855a893514131fc91f99 to your computer and use it in GitHub Desktop.
Save hughdbrown/94016d0f477b855a893514131fc91f99 to your computer and use it in GitHub Desktop.
How correlated is rank of most mobile cities versus rank of most foreign-born cities?
#!/usr/bin/env python
from __future__ import print_function
from re import compile
from abc import abstractmethod
from scipy.stats import spearmanr, pearsonr, linregress
import numpy as np
import matplotlib.pyplot as plt
table_re1 = compile(r"""^\d+\s+(?P<city>[^,]+),""")
table_re2 = compile(r"""^(?P<city>[^,\(]+)""")
class MobileForeignAbstract(object):
def __init__(self, mobile_data, foreign_data):
self.mobile_cities, self.foreign_born_cities = mobile_data, foreign_data
@abstractmethod
def mobile(self):
pass
@abstractmethod
def foreign(self):
pass
@abstractmethod
def compare(self, shared, mobile, foreign):
pass
@abstractmethod
def correlation(self, x, y):
pass
def graph(self, x, y):
reg = linregress(x, y)
slope, intercept = reg.slope, reg.intercept
ybar = np.array([slope * xx + intercept for xx in x])
plt.scatter(x, y)
plt.plot(x, ybar)
ax = plt.gca()
ax.set_xlabel("Mobility")
ax.set_ylabel("Foreign born")
ax.set_title("Cities: mobility versus % foreign-born")
regression = self.correlation(x, y)
ax.text(min(x), max(y), r'correlation {0:.3f}'.format(regression))
plt.show()
def shared(self, mobile, foreign):
# Find the 42 cities common to the two sets
# Dropped out:
# ['Dayton', 'Bridgeport', 'Nashville', 'Providence', 'Grand Rapids',
# 'Salt Lake City', 'Port St. Lucie', 'Manchester']
return set(mobile).intersection(set(foreign))
class MobileForeignOrdinal(MobileForeignAbstract):
def mobile(self):
return [
table_re1.match(line).groupdict()["city"]
for line in self.mobile_cities.splitlines()
]
def foreign(self):
return [
table_re2.match(line).groupdict()["city"].strip()
for line in self.foreign_born_cities.splitlines()
]
def compare(self, shared, mobile, foreign):
shared_mobile = [city for city in mobile if city in shared]
shared_foreign = [city for city in foreign if city in shared]
mapped = [
(i, shared_foreign.index(city))
for i, city in enumerate(shared_mobile)
]
mapped = np.array(mapped)
return mapped[:, 0], mapped[:, 1]
def correlation(self, x, y):
return spearmanr(x, y).correlation
class MobileForeignNumeric(MobileForeignAbstract):
def mobile(self):
return {
table_re1.match(line).groupdict()["city"]: float(line.split(' ')[-4])
for line in self.mobile_cities.splitlines()
}
def foreign(self):
return {
table_re2.match(line).groupdict()["city"].strip(): float(line.split('\t')[-2])
for line in self.foreign_born_cities.splitlines()
}
def compare(self, shared, mobile, foreign):
mapped = {
city: (mobile[city], foreign[city])
for city in shared
}
mapped = np.array(mapped.values())
return mapped[:, 0], mapped[:, 1]
def correlation(self, x, y):
return pearsonr(x, y)[0]
# From http://www.rajchetty.com/chettyfiles/mobility_geo.pdf
mobile_cities = """1 Salt Lake City, Utah 1,426,729 46.2 10.8 77.3 0.264
2 Pittsburgh, Pennsylvania 2,561,364 45.2 9.5 74.9 0.359
3 San Jose, California 2,393,183 44.7 12.9 73.5 0.235
4 Boston, Massachusetts 4,974,945 44.6 10.5 73.7 0.322
5 San Francisco, California 4,642,561 44.4 12.2 72.5 0.250
6 San Diego, California 2,813,833 44.3 10.4 74.3 0.237
7 Manchester, New Hampshire 1,193,391 44.2 10.0 75.0 0.296
8 Minneapolis, Minnesota 2,904,389 44.2 8.5 75.2 0.338
9 Newark, New Jersey 5,822,286 44.1 10.2 73.7 0.350
10 New York, New York 11,781,395 43.8 10.5 72.2 0.330
11 Los Angeles, California 16,393,360 43.4 9.6 73.8 0.231
12 Providence, Rhode Island 1,582,997 43.4 8.2 73.6 0.333
13 Washington, DC 4,632,415 43.2 11.0 72.2 0.330
14 Seattle, Washington 3,775,744 43.2 10.9 72.0 0.273
15 Houston, Texas 4,504,013 42.8 9.3 74.7 0.325
16 Sacramento, California 2,570,609 42.7 9.7 71.3 0.257
17 Bridgeport, Connecticut 3,405,565 42.4 7.9 72.4 0.359
18 Fort Worth, Texas 1,804,370 42.3 9.1 73.6 0.320
19 Denver, Colorado 2,449,044 42.2 8.7 73.3 0.294
20 Buffalo, New York 2,369,699 42.0 6.7 73.1 0.368
21 Miami, Florida 3,955,969 41.5 7.3 76.3 0.267
22 Fresno, California 1,419,998 41.3 7.5 71.3 0.295
23 Portland, Oregon 1,842,889 41.3 9.3 70.5 0.277
24 San Antonio, Texas 1,724,863 41.1 6.4 74.3 0.320
25 Philadelphia, Pennsylvania 5,602,247 40.8 7.4 69.6 0.393
26 Austin, Texas 1,298,076 40.4 6.9 71.9 0.323
27 Dallas, Texas 3,405,666 40.4 7.1 72.6 0.347
28 Phoenix, Arizona 3,303,211 40.3 7.5 70.6 0.294
29 Grand Rapids, Michigan 1,286,045 40.1 6.4 71.3 0.378
30 Kansas City, Missouri 1,762,873 40.1 7.0 70.4 0.365
31 Las Vegas, Nevada 1,568,418 40.0 8.0 71.1 0.259
32 Chicago, Illinois 8,183,799 39.4 6.5 70.8 0.393
33 Milwaukee, Wisconsin 1,660,659 39.3 4.5 70.3 0.424
34 Tampa, Florida 2,395,997 39.1 6.0 71.3 0.335
35 Orlando, Florida 1,697,906 39.1 5.8 71.5 0.326
36 Port St. Lucie, Florida 1,533,306 39.0 6.2 71.2 0.303
37 Baltimore, Maryland 2,512,431 38.8 6.4 67.7 0.412
38 St. Louis, Missouri 2,325,609 38.4 5.1 69.0 0.413
39 Dayton, Ohio 1,179,009 38.3 4.9 68.2 0.397
40 Cleveland, Ohio 2,661,167 38.2 5.1 68.7 0.405
41 Nashville, Tennessee 1,246,338 38.2 5.7 67.9 0.357
42 New Orleans, Louisiana 1,381,652 38.2 5.1 69.5 0.397
43 Cincinnati, Ohio 1,954,800 37.9 5.1 66.4 0.429
44 Columbus, Ohio 1,663,807 37.7 4.9 67.1 0.406
45 Jacksonville, Florida 1,176,696 37.5 4.9 68.9 0.361
46 Detroit, Michigan 5,327,827 37.3 5.5 68.5 0.358
47 Indianapolis, Indiana 1,507,346 37.2 4.9 67.5 0.398
48 Raleigh, North Carolina 1,412,127 36.9 5.0 67.3 0.389
49 Atlanta, Georgia 3,798,017 36.0 4.5 69.4 0.366
50 Charlotte, North Carolina 1,423,942 35.8 4.4 67.0 0.397"""
# From https://en.wikipedia.org/wiki/List_of_United_States_cities_by_foreign-born_population
foreign_born_cities = """Hialeah, Florida 218,901 162,951 74.4 1
Miami, Florida 433,143 244,352 56.4 2
Santa Ana, California 340,378 166,960 49.1 3
Fremont, California 205,521 88,211 42.9 4
Los Angeles, California 3,831,880 1,521,119 39.7 5
San Jose, California 964,679 367,711 38.1 6
Anaheim, California 337,899 127,111 37.6 7
New York, New York 8,391,881 2,996,580 35.7 8
Irvine, California 209,707 74,225 35.4 9
Jersey City, New Jersey 242,513 84,583 34.9 10
Irving, Texas 205,549 70,263 34.2 11
San Francisco, California 815,358 278,369 34.1 12
Chula Vista, California 223,746 71,797 32.1 13
Yonkers, New York 201,073 62,818 31.2 14
Oakland, California 409,151 116,794 28.5 15
Houston, Texas 2,260,918 644,167 28.5 16
Laredo, Texas 226,419 62,653 27.7 17
Stockton, California 287,584 79,511 27.6 18
Garland, Texas 222,013 60,187 27.1 19
Long Beach, California 462,594 124,340 26.9 20
Newark, New Jersey 278,157 74,762 26.9 21
Honolulu, Hawaii (CDP) 374,658 96,752 25.8 22
Paradise, Nevada (CDP) 202,987 51,090 25.2 23
Boston, Massachusetts 645,187 161,740 25.1 24
San Diego, California 1,306,228 325,819 24.9 25
North Las Vegas, Nevada 224,416 55,805 24.9 26
Dallas, Texas 1,299,590 322,072 24.8 27
El Paso, Texas 620,440 151,295 24.4 28
Riverside, California 297,863 69,792 23.4 29
Plano, Texas 273,381 62,217 22.8 30
Aurora, Colorado 323,288 70,747 21.9 31
Sacramento, California 466,685 102,076 21.9 32
Arlington, Virginia (CDP) 217,483 47,379 21.8 33
Phoenix, Arizona 1,593,660 346,430 21.7 34
Las Vegas, Nevada 567,610 119,437 21 35
Chicago, Illinois 2,850,502 588,480 20.6 36
Fresno, California 479,911 97,316 20.3 37
Austin, Texas 790,593 159,353 20.2 38
Arlington, Texas 380,072 76,440 20.1 39
Fort Worth, Texas 731,588 131,197 17.9 40
Reno, Nevada 219,649 37,964 17.3 41
Seattle, Washington 616,669 105,154 17.1 42
Tucson, Arizona 543,907 90,794 16.7 43
Orlando, Florida 235,876 39,210 16.6 44
Bakersfield, California 324,479 53,480 16.5 45
Denver, Colorado 610,345 95,585 15.7 46
Minneapolis, Minnesota 385,384 59,093 15.3 47
St. Paul, Minnesota 281,244 42,669 15.2 48
Modesto, California 202,740 30,464 15 49
Tampa, Florida 343,879 50,377 14.6 50
Glendale, Arizona 253,210 36,197 14.3 51
Charlotte, North Carolina 704,417 96,734 13.7 52
San Antonio, Texas 1,373,677 180,895 13.2 53
Durham, North Carolina 229,147 30,174 13.2 54
Raleigh, North Carolina 405,197 53,154 13.1 55
Scottsdale, Arizona 237,834 30,074 12.6 56
Chandler, Arizona 249,515 31,551 12.6 57
Portland, Oregon 566,606 71,380 12.6 58
Mesa, Arizona 467,178 56,895 12.2 59
Washington, D.C. 599,657 72,110 12 60
Henderson, Nevada 256,424 30,462 11.9 61
Nashville-Davidson, Tennessee 605,466 70,404 11.6 62
Philadelphia, Pennsylvania 1,547,297 179,444 11.6 63
Winston-Salem, North Carolina 229,826 26,064 11.3 64
Oklahoma City, Oklahoma 560,226 63,199 11.3 65
Albuquerque, New Mexico 529,216 57,298 10.8 66
Columbus, Ohio 773,021 83,091 10.7 67
Des Moines, Iowa 200,569 21,050 10.5 68
Greensboro, North Carolina 255,141 26,410 10.4 69
St. Petersburg, Florida 244,318 25,094 10.3 70
Milwaukee, Wisconsin 605,027 59,785 9.9 71
Anchorage, Alaska (municipality) 286,174 27,107 9.5 72
Omaha, Nebraska 454,714 43,029 9.5 73
Tulsa, Oklahoma 389,369 36,514 9.4 74
Gilbert, Arizona (town) 222,092 20,606 9.3 75
Jacksonville, Florida 813,518 73,992 9.1 76
Wichita, Kansas 372,194 33,608 9 77
Boise City, Idaho 205,698 18,221 8.9 78
Madison, Wisconsin 235,410 20,736 8.8 79
Virginia Beach, Virginia 433,575 36,727 8.5 80
Corpus Christi, Texas 287,231 23,315 8.1 81
Lexington-Fayette, Kentucky (urban county) 296,545 23,967 8.1 82
Indianapolis (balance), Indiana 807,640 63,241 7.8 83
Colorado Springs, Colorado 399,803 31,266 7.8 84
Fort Wayne, Indiana 251,825 19,055 7.6 85
Atlanta, Georgia 540,932 39,733 7.3 86
Buffalo, New York 270,221 19,618 7.3 87
Kansas City, Missouri 482,228 34,085 7.1 88
Lincoln, Nebraska 254,008 17,128 6.7 89
Detroit, Michigan 910,848 60,170 6.6 90
Rochester, New York 207,291 13,454 6.5 91
Baltimore, Maryland 637,418 41,343 6.5 92
Pittsburgh, Pennsylvania 311,640 19,993 6.4 93
Norfolk, Virginia 233,333 14,562 6.2 94
Richmond, Virginia 204,451 12,301 6 95
St. Louis, Missouri 356,587 21,177 5.9 96
Spokane, Washington 203,268 11,562 5.7 97
Louisville, Kentucky ("balance") 566,492 31,315 5.5 98
Memphis, Tennessee 676,646 36,519 5.4 99
New Orleans, Louisiana 354,850 18,968 5.3 100
Lubbock, Texas 225,865 10,709 4.7 101
Akron, Ohio 207,208 9,526 4.6 102
Baton Rouge, Louisiana 225,388 10,235 4.5 103
Cleveland, Ohio 431,369 19,495 4.5 104
Chesapeake, Virginia 222,455 9,838 4.4 105
Birmingham, Alabama 231,824 8,883 3.8 106
Cincinnati, Ohio 333,013 11,471 3.4 107
Toledo, Ohio 316,164 8,793 2.8 108
Montgomery, Alabama 201,465 5,454 2.7 109"""
def main(model):
m = model(mobile_cities, foreign_born_cities)
mobile = m.mobile()
foreign = m.foreign()
shared = m.shared(mobile, foreign)
x, y = m.compare(shared, mobile, foreign)
m.graph(x, y)
if __name__ == '__main__':
#main(MobileForeignOrdinal)
main(MobileForeignNumeric)
@hughdbrown
Copy link
Author

Pearson correlation is about 55%.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment