Created
September 27, 2018 07:32
-
-
Save concerned3rdparty/b7447fda2bf8f1468f95377ad17d9b02 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
I don't utilize spark so much but this problem seemed to fit spark graphx api. | |
I tried to find subgraphs(connected components) but graphx api could not find any subgraph since there is a no isolated subgraphs. | |
I also tried to find strongly connected components but it just listed one graph as a whole. | |
I used pagerank method of graph to rank vertices based on their connections. Listed top 10 ranked vertices . If there had been subgraphs, this code would list all | |
subgraphs' individual pageranks and lists top 10 ranked vertices for that subgraph. | |
Top 10 ranks for the whole graph : | |
vertex id 298 rank 16.08652645559089 | |
vertex id 389 rank 12.098406805535728 | |
vertex id 304 rank 12.087497056846983 | |
vertex id 736 rank 11.754874286531196 | |
vertex id 93 rank 9.987520674262788 | |
vertex id 359 rank 9.612964315366263 | |
vertex id 594 rank 8.581300969025714 | |
vertex id 205 rank 8.329897239219575 | |
vertex id 156 rank 8.284141266051707 | |
vertex id 561 rank 7.793492685726111 | |
Most influential people are the higher ranked ones. | |
Since these people have the most connections, targeting ads towards these people also called hubs would be more efficient . | |
These people has the biggest network effect . Ads are like gossips and gossips propogates faster from hubs . | |
whole output from the code above : | |
edges count 9076 | |
vertices count 2892 | |
subGraphId 1 vrtcs 1084,667,1053,1894,2493,1325,140,204,956,291,1,2334,755,1813,2335,2506,1393,450,2021,160,1780,1596,2797,2346,2117,355,1500,347,548,2351,2541,1732,2602,2325,1550,528,1716,453,1310,1202,2718,2192,1031,196,2706,1377,1005,2511,1144,1363,1670,2657,2180,266,1344,2235,2239,2618,2787,83,988,735,1567,1279,2790,1468,1678,836,2439,1800,2744,1656,95,1641,379,2251,436,959,2366,259,759,1977,1906,2070,961,2588,1035,1044,1674,2867,734,2398,491,4,2205,2555,2875,417,1300,1042,359,685,1947,152,1431,1187,406,1883,2175,888,2819,874,483,1013,2591,162,2017,2200,2724,120,2613,2837,2581,2352,2281,2332,1369,601,1623,2361,1710,2653,2059,1688,1755,1750,1228,123,306,2080,282,1132,360,177,52,2507,1608,2641,2518,1481,2132,720,770,2824,219,2829,362,862,305,1066,1417,671,1290,1981,966,694,280,587,922,1133,117,2297,446,1795,245,1205,1730,163,2321,1117,2151,2497,209,348,1123,2079,1911,1545,1933,2054,2412,635,171,2005,2030,1268,1418,2527,679,1483,1651,1207,543,288,2629,2452,981,299,1769,539,332,1865,2586,1139,2425,1092,1809,2644,2696,1988,1070,1294,2194,1265,532,2672,2053,118,1681,897,858,1196,1959,2749,1365,738,1216,1930,1622,2818,2561,1919,2113,28,1935,2245,2809,958,633,2312,1870,106,2689,2137,435,274,1293,2710,699,2008,1459,1288,1642,891,2679,2889,1079,1209,1095,2300,88,805,817,767,319,1576,2161,1253,1448,1473,1802,913,1771,175,396,1818,2170,497,2256,418,2295,1141,238,18,20,1495,268,2548,2149,1899,1618,1722,504,614,1904,2449,1153,471,627,2606,86,60,1381,391,1462,1715,2447,2538,638,2301,2575,349,426,36,1206,2883,2290,358,978,2446,924,45,2558,2095,1457,1322,1616,148,853,2890,940,2662,1781,2509,2058,281,128,2203,1861,1285,560,210,2196,2860,1301,2722,1841,2453,642,1530,1201,1701,2795,2658,2788,1517,960,1663,580,778,1426,1786,2073,2242,373,369,2436,1840,2304,2051,2062,2785,17,2616,2331,2234,2362,2863,512,2632,1889,77,775,366,1971,2036,1058,1908,2163,1654,1475,434,2250,2540,1062,2874,698,96,2857,1823,1801,1551,2804,1316,1873,2756,885,1579,263,1006,1477,2879,2845,1563,2370,1025,2798,72,2836,1671,2009,2856,652,2088,2748,1709,76,1192,847,2420,902,1918,1693,507,2271,181,571,124,984,1340,2286,1420,1585,8,372,2197,600,2774,2374,937,1178,2780,2582,2633,1171,910,2441,798,279,2835,2760,2654,1007,773,1266,1727,726,1766,2490,1298,1258,2257,1346,1874,613,608,606,1184,678,943,2859,2429,190,763,2128,872,1396,378,1233,2551,1653,26,2637,1534,1023,757,1856,2001,1737,1089,2146,2757,294,188,2640,2296,634,911,2479,2727,273,2478,2241,2380,1342,1948,921,232,1811,899,1183,1267,1387,1378,895,2505,1832,258,2311,1367,2179,361,1303,484,550,1392,881,609,762,1434,1166,130,641,860,1547,1685,1625,1591,1415,1449,990,1358,2145,1535,2316,2144,38,2430,1644,2714,2531,2877,2209,1812,2751,1045,1274,681,1822,1639,2274,1695,1860,1875,1980,1224,898,1969,296,1773,1026,229,953,783,2500,1507,2870,2651,2496,2580,2611,2407,655,1629,997,2085,248,852,2408,1064,1397,178,167,1504,1944,756,1508,1090,1051,1844,2106,2702,1122,1411,2309,1539,1692,227,1351,2336,2754,244,669,1960,684,2423,2684,101,2826,1388,134,970,2097,383,2489,459,1327,202,278,495,1627,341,1543,1188,2590,1323,718,41,409,980,2056,1094,2687,2014,2680,1974,2284,1395,213,2521,2201,2050,877,1140,2365,1118,1333,1975,1684,1134,328,1360,2204,1263,800,2212,1223,407,927,1751,802,2237,765,50,2522,1082,2619,133,951,422,2635,1648,596,1783,1655,2190,374,1194,2253,2530,1302,1805,2465,2289,2118,2681,1761,2191,2458,2134,451,454,2002,2354,752,2573,2422,464,1925,14,1721,2078,1494,2405,2869,1338,215,195,226,318,1128,2892,228,1282,2010,967,1304,403,992,1973,815,799,637,758,2838,2108,1029,1386,599,2605,2116,2159,1731,784,1689,2487,2330,1961,1942,1714,918,387,1970,1125,292,1049,1592,1399,127,1262,754,2164,1597,1945,2634,252,2604,827,1020,1383,251,1509,9,2011,2822,651,1359,1229,470,1854,1843,2101,2649,1699,1142,1437,869,804,2768,2746,2147,1774,2141,174,1424,2459,499,1100,1518,2844,2306,2378,672,682,1826,1682,683,2694,2544,1602,400,2091,2071,2364,1149,764,1065,334,2020,2770,1470,1200,1605,890,2067,1538,1245,1762,1235,33,23,1221,1979,1659,1624,3,662,2210,1498,271,1743,312,1888,2019,2107,936,2564,1982,557,158,750,2012,2291,121,1167,1177,1967,1884,64,2467,973,2236,2393,285,843,1735,1728,569,1185,629,508,640,2136,1435,714,1463,1516,663,2026,2324,1257,2127,116,1151,27,2435,1366,2855,1914,1620,257,1777,1047,2168,1581,751,534,1248,1938,1503,71,2356,729,1127,311,1706,1740,933,1923,1318,1793,2160,2434,2310,1027,2000,1307,59,2299,411,1315,1408,1600,1611,1541,31,841,2156,1934,2100,630,661,556,1937,476,647,29,2375,2759,894,161,1817,2523,1879,1101,963,322,313,2499,1078,2473,2169,760,1129,519,2182,337,1529,1484,2585,2546,2474,371,1269,2773,1578,2667,2516,1814,575,1776,1057,2532,1767,2630,2484,1662,2445,1371,1074,80,2526,1048,65,865,35,352,1056,2614,1349,1936,270,998,1341,90,2171,1197,339,1114,1705,2358,253,1173,1542,1897,2813,424,1497,1747,2477,200,1440,2663,142,1577,1640,1990,1456,250,1019,1617,2728,2379,1892,1240,780,1819,308,87,287,2243,455,2139,1215,1138,2172,691,2779,2153,607,2105,2046,2765,1038,2553,748,901,2406,746,1764,2042,938,1331,2007,1256,2277,1384,61,747,1236,97,2024,48,803,448,392,2771,979,478,776,932,386,1921,2313,2596,1260,180,1868,1734,1645,2389,996,1355,1976,24,303,146,1329,1073,2510,201,703,2848,934,2715,1570,2371,2231,576,1162,810,712,2471,1601,781,1882,2778,719,2254,2659,22,255,2853,1261,1838,515,2882,731,1910,2337,2617,136,590,2061,2685,2372,2392,1241,81,962,2805,1010,2597,427,884,1402,246,1615,1059,1829,1195,1198,2421,736,78,144,2403,1317,494,1901,2708,187,1433,1903,2155,1758,1264,774,73,665,2492,1428,1374,164,2735,438,237,1368,143,595,579,794,2563,2686,389,1574,1210,2782,1146,474,2545,650,2753,2565,437,2173,2636,1896,1700,89,2729,402,241,597,1704,481,1951,2491,1174,277,1447,737,333,1775,2098,2122,1806,845,1931,2221,2755,916,2738,561,1864,284,1956,2093,1559,1707,674,914,830,2723,2275,516,867,2725,16,2562,2176,2741,1111,1939,1343,385,1913,2345,2645,2884,500,1493,10,2404,1385,301,2099,1657,2566,260,995,490,1115,844,2278,314,900,2357,2273,749,1560,2692,340,2076,2258,2466,1546,582,1211,904,170,1421,1741,2828,732,2013,1742,37,185,624,1446,2450,203,2468,644,2823,112,2063,1612,2419,1514,1962,1412,632,1476,1839,974,544,2084,184,113,126,2238,1779,2572,1442,832,2225,1594,2470,198,2656,168,1520,2730,1950,1519,1954,222,551,793,191,57,336,214,2638,1295,2469,1193,942,2341,2625,2799,1372,115,2834,1698,1599,1746,2592,717,1572,1161,1634,1941,1986,1522,1646,823,2864,1244,2849,889,2125,317,813,221,2535,1820,1401,917,2355,1621,1067,593,412,2075,2077,2157,105,1451,157,104,458,625,1403,2589,1083,868,1902,1963,1034,1568,2624,1895,2816,668,1376,568,1667,837,1589,395,2431,1675,617,2208,1213,2104,594,442,1313,1309,789,570,2400,1878,1719,2072,323,1694,792,2432,1647,1633,416,205,1275,1160,2758,1929,2216,1391,564,2821,1001,47,724,989,276,2048,1536,207,1460,521,991,604,1964,377,2353,155,1410,1687,466,1040,912,931,1466,492,2594,1419,1148,1168,1830,1717,801,460,131,2664,1390,1630,293,93,2092,1632,1182,558,1150,1808,944,1280,1046,2052,1816,1158,2387,338,1176,1075,1297,1098,2721,79,603,344,541,1749,834,741,1320,664,2174,2615,1017,2040,2360,2762,968,423,1940,1556,326,2246,2135,831,1225,1289,2034,208,2133,1186,2198,987,2293,7,2187,675,1319,1405,1076,536,2648,695,2578,150,2669,1407,1373,2683,2415,1155,1565,368,796,2279,2886,791,236,316,1555,2247,612,269,2368,1850,1218,1356,1157,994,166,1922,1175,393,1014,2068,2004,1109,1690,2178,1243,304,498,496,935,2206,2515,197,1792,1532,298,156,702,1413,1987,727,1136,1488,2514,173,821,432,1181,2891,1135,1726,159,2367,857,1898,2451,2528,2426,1126,2571,1050,2513,2023,1004,2868,1992,2646,2661,42,2103,2752,559,1799,2801,488,2288,573,487,517,129,1271,692,2322,930,744,906,1496,2739,952,399,2121,2183,1603,1858,1231,2750,2220,2671,977,100,482,217,139,2550,1222,300,533,1003,2697,850,812,846,1759,165,2338,658,2717,1842,2688,2123,1227,354,1573,1011,1400,1163,2508,2111,2802,2820,975,1232,2666,441,401,2043,1252,182,2185,1443,2326,1660,2701,2673,1691,1024,1326,2539,2481,2885,1791,1511,1569,2438,461,1370,44,2537,618,879,670,151,876,1821,622,2112,2055,1375,639,2047,743,2386,1112,1785,552,2800,969,1389,1788,1915,199,145,1549,1214,583,1794,1580,2343,709,25,2524,370,2142,2789,62,833,2839,444,6,1718,620,993,1643,2167,1450,431,1713,1778,1487,398,1234,475,1054,1259,2533,2259,262,549,939,445,2060,1890,2032,242,2244,1953,1952,2417,2557,2114,2475,2482,2120,486,1815,753,2329,2207,2674,1521,700,1803,2154,1191,1230,493,1637,1753,1827,1147,2199,893,2498,1080,1330,1527,2806,986,1350,2396,1880,1501,1957,2027,1871,2691,659,12,861,1876,447,2486,1658,1752,2427,452,1989,1246,2119,367,331,2202,2814,1480,420,2272,1575,283,429,2584,2693,2138,2035,350,1583,808,1754,1467,1199,842,2195,851,40,1425,2298,1782,2188,1848,787,1862,547,1068,363,2704,636,701,645,1486,2424,419,711,345,2280,2143,98,1219,814,92,1833,1406,771,2410,1324,1120,1966,1997,849,854,1852,84,1055,2627,1984,122,2781,2767,1834,430,351,2130,1928,589,1353,469,1030,2827,1796,2462,948,2025,2763,2394,2444,2294,2622,820,1037,786,1124,2224,1379,193,907,2383,1772,2733,513,1994,892,707,1996,2871,2433,2881,449,2411,2109,2703,1920,2276,2041,1203,1032,2480,643,2150,1729,1452,2731,53,616,1789,1686,69,1081,1306,2265,91,176,2628,119,2707,511,2320,1362,1867,477,2840,1972,1121,883,376,925,653,928,1291,2057,2350,2777,70,2609,364,523,2166,999,1607,2519,1998,562,2228,839,1063,1091,2318,240,1619,1286,1863,295,2314,1877,335,2115,826,234,2181,2340,1736,297,723,529,1666,1239,518,235,909,1471,2303,828,1552,247,964,1441,2454,2442,1702,2842,1669,1061,2066,2377,697,945,1926,2131,2385,1893,2158,886,2793,1614,1422,565,2037,1744,2846,2083,2534,388,254,2593,137,1784,1676,822,2388,231,2269,1912,2825,1156,149,1332,2536,1087,1679,1760,2559,2698,109,2342,1276,15,2391,704,2255,903,1983,394,1489,2140,882,206,2830,2460,223,806,1072,2347,1649,1208,807,153,2328,2102,289,838,510,272,1787,1584,1299,2270,2065,976,816,2861,2887,2359,971,261,666,1458,1485,2455,2344,584,1491,1965,2018,1604,2283,1613,1380,1251,2560,425,1991,706,2525,1039,1924,230,1571,325,2520,2743,2650,648,1021,610,1334,1455,179,2483,983,154,2233,1955,1586,949,1917,1041,1108,2764,1703,1180,1423,1887,687,1610,2726,485,2620,1292,2831,1105,1212,233,1249,1985,2064,537,2230,1544,315,147,1831,1110,772,586,1724,428,1907,1900,1328,1445,11,1336,1661,302,1430,443,1414,2850,1416,94,2305,1255,192,2716,21,321,577,715,2323,2397,2608,1404,2081,646,489,696,2517,1308,2712,1165,58,1677,2747,855,878,929,1851,1104,1099,2732,848,2709,2110,1505,1635,2587,375,1190,1296,835,30,275,2263,2464,919,1825,2413,688,2865,2600,506,467,592,2626,397,1932,1107,1949,631,863,740,2808,501,1398,1968,2792,1172,2494,1531,2193,34,267,2488,327,2248,1680,1540,1566,2529,2668,1454,415,1337,74,1226,1673,1891,1849,103,588,2074,1106,224,2678,905,2069,1916,46,343,2700,2713,572,2495,1548,1853,1472,2213,1102,114,1502,1537,2186,1287,1321,1347,456,2031,1086,1697,686,923,2461,1492,2262,2363,51,2463,567,421,290,1733,1009,689,1354,2876,818,1077,555,2720,2409,2737,2612,965,1708,829,1170,2880,102,249,542,1561,1720,733,2579,656,39,574,1002,769,2348,2327,2033,1905,66,1804,138,480,530,1528,1606,957,212,1866,1723,1220,2791,211,1352,1242,1438,330,2734,1609,2851,509,721,2665,676,2852,742,1015,985,677,194,2315,2570,1130,1824,809,2631,722,1270,1012,1768,1533,1145,950,2308,1590,2485,1033,591,110,1943,2503,896,2642,1638,218,824,56,2742,1909,920,2878,2811,566,2682,2302,2319,779,1345,585,2542,2416,1116,708,479,1837,768,1562,2761,457,615,1857,2577,2229,440,526,502,1738,1478,856,2376,2705,353,2384,1436,2129,2873,1626,2418,941,2215,2655,1999,864,342,1281,2888,2217,1461,673,2740,169,1587,545,538,220,1189,2476,972,825,1756,710,690,1499,2552,1479,871,1748,1836,5,2,13,554,1278,946,2428,2349,1554,1512,75,1465,2373,716,1464,2501,1335,108,540,870,2082,410,2621,1513,1595,1524,2766,1482,1088,2260,2148,2165,1143,777,2317,468,1312,1137,1757,1711,68,365,1859,2556,2227,324,320,408,795,307,2643,1036,2677,346,520,1523,514,1995,390,623,2214,1060,1807,535,1739,605,2049,2261,628,2854,1097,954,1885,2219,1409,1096,621,2266,626,2607,2872,1069,1250,2843,1553,1828,381,2554,1217,524,1311,2339,2022,216,649,1636,2089,1946,505,581,2610,2090,1427,2786,2402,1664,2448,915,553,404,2333,782,1159,2675,243,329,310,1770,1696,2015,1453,2599,819,439,602,1237,265,2660,1022,1515,111,527,1564,2623,2736,730,2016,1855,2772,256,462,2086,2583,99,2699,2807,1598,1797,2866,2862,2028,172,2472,2184,2847,705,797,1672,135,1927,2401,1745,1154,785,840,239,189,875,2569,54,286,1978,2307,1071,1790,531,1683,563,2124,2803,380,132,1052,2162,1152,660,43,2567,2152,2218,2457,2456,2039,1131,2711,2045,2690,1043,2745,598,2252,357,356,433,1835,2287,19,2282,1277,2647,2189,1305,1525,2670,2006,1314,2038,2381,1008,1339,473,2549,1652,2858,926,2437,1364,2695,63,2414,309,2094,859,728,2443,1881,2595,1284,1028,1668,1872,2676,2512,1631,1348,2502,186,1254,2601,1958,1439,525,1272,2226,2810,1016,413,2029,2796,1357,1510,2784,82,1847,2399,766,2639,866,1394,1869,761,1810,1765,1763,472,654,463,414,1558,2267,1474,2126,546,1650,1382,1526,49,2223,2249,1085,2568,693,2598,2240,2547,2832,32,2222,1238,382,1469,141,1582,1273,2775,1113,2177,745,1164,1247,955,2382,1179,1432,2833,2776,125,2292,1665,1429,611,1886,2211,887,1169,2812,725,1725,1588,2817,1018,657,713,1506,1283,2440,2268,2719,578,2232,947,2603,908,503,1712,1119,790,1845,1361,2815,2794,264,2769,67,811,85,2369,405,2096,1000,982,1204,2783,1993,225,619,1798,2087,2044,465,55,1846,2652,107,2574,2285,1628,873,680,183,1593,788,2841,1444,2395,2390,1557,1093,2543,2576,1103,2003,1490,739,2504,2264,880,384,522 | |
subgraph array index 0 edges count 9076 | |
subgraph array index 0 vertices count 2892 | |
vertex id 298 rank 16.08652645559089 | |
vertex id 389 rank 12.098406805535728 | |
vertex id 304 rank 12.087497056846983 | |
vertex id 736 rank 11.754874286531196 | |
vertex id 93 rank 9.987520674262788 | |
vertex id 359 rank 9.612964315366263 | |
vertex id 594 rank 8.581300969025714 | |
vertex id 205 rank 8.329897239219575 | |
vertex id 156 rank 8.284141266051707 | |
vertex id 561 rank 7.793492685726111 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.graphx._ | |
import org.apache.spark._ | |
import org.apache.spark.rdd.RDD | |
import org.apache.spark.SparkContext | |
object BristolAdjacency { | |
def main(args: Array[String]): Unit ={ | |
lazy val conf = { | |
new SparkConf(false) | |
.setMaster("local[1]") // todo multiple core'lar sıralamalarda karışıklık yaratıyor | |
.setAppName("GraphX Example") | |
} | |
lazy val sc = new SparkContext(conf) | |
sc.setLogLevel("ERROR") | |
//loads space seperated edge data file | |
val graph: Graph[PartitionID, PartitionID] = GraphLoader.edgeListFile(sc, "BristolAdjacencySpaced.txt") | |
val vertexCount: VertexId = graph.numVertices | |
val vertices: VertexRDD[PartitionID] = graph.vertices | |
val edgeCount: VertexId = graph.numEdges | |
val edges: EdgeRDD[PartitionID] = graph.edges | |
println("edges count " + edgeCount) | |
println("vertices count " + vertexCount) | |
val fullGraphRanks: VertexRDD[Double] = graph.pageRank(0.0001).vertices | |
val groupedComponents: Array[(VertexId, Iterable[VertexId])] = graph.stronglyConnectedComponents(5).vertices.map(_.swap).groupByKey.collect | |
val subgraphs: Array[Graph[PartitionID, PartitionID]] = groupedComponents.map{case (subGraphId, vrtcs) => | |
println("subGraphId " + subGraphId + " vrtcs " + vrtcs.mkString(",")) | |
graph.subgraph(vpred = (id, name) => vrtcs.toSet.contains(id)) | |
} | |
subgraphs.zipWithIndex.foreach{case (subgraph,i) => | |
println("subgraph array index " + i + " edges count " + subgraph.numEdges) | |
println("subgraph array index " + i + " vertices count " + subgraph.numVertices) | |
val ranks = subgraph.pageRank(0.0001).vertices.sortBy(_._2 , ascending = false) | |
ranks.take(10).foreach{case (id, rank) => println("vertex id " + id + " rank " + rank)} | |
} | |
System.in.read | |
sc.stop() | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment