Created
January 3, 2020 03:47
-
-
Save viniciusmss/ea258495e7399a6ff28e2a6d5746fe45 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Assignment 1 \n", | |
| "## Answer key\n", | |
| "\n", | |
| "### Data Preprocessing" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 80, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:21.427614Z", | |
| "start_time": "2020-01-03T03:46:15.053Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "### Multilateral Development Institution Data\n", | |
| "foo <- read.csv(\"https://tinyurl.com/yb4phxx8\") # read in the data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 81, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:21.474520Z", | |
| "start_time": "2020-01-03T03:46:15.065Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<ol class=list-inline>\n", | |
| "\t<li>'Number'</li>\n", | |
| "\t<li>'Status'</li>\n", | |
| "\t<li>'Cluster'</li>\n", | |
| "\t<li>'Prefix'</li>\n", | |
| "\t<li>'Type'</li>\n", | |
| "\t<li>'Country'</li>\n", | |
| "\t<li>'Dept'</li>\n", | |
| "\t<li>'Division'</li>\n", | |
| "\t<li>'LTAA'</li>\n", | |
| "\t<li>'Loan'</li>\n", | |
| "\t<li>'ApprovalDate'</li>\n", | |
| "\t<li>'AgreementDate'</li>\n", | |
| "\t<li>'Agreement'</li>\n", | |
| "\t<li>'EffectivityDate'</li>\n", | |
| "\t<li>'CancellationDate'</li>\n", | |
| "\t<li>'FinancialCompletionDate'</li>\n", | |
| "\t<li>'OriginalCompletionDate'</li>\n", | |
| "\t<li>'RevisedCompletionDate'</li>\n", | |
| "\t<li>'JSF'</li>\n", | |
| "\t<li>'Fund'</li>\n", | |
| "\t<li>'RevisedAmount'</li>\n", | |
| "\t<li>'Rating'</li>\n", | |
| "\t<li>'No'</li>\n", | |
| "\t<li>'PCR'</li>\n", | |
| "\t<li>'CirculationDate'</li>\n", | |
| "</ol>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{enumerate*}\n", | |
| "\\item 'Number'\n", | |
| "\\item 'Status'\n", | |
| "\\item 'Cluster'\n", | |
| "\\item 'Prefix'\n", | |
| "\\item 'Type'\n", | |
| "\\item 'Country'\n", | |
| "\\item 'Dept'\n", | |
| "\\item 'Division'\n", | |
| "\\item 'LTAA'\n", | |
| "\\item 'Loan'\n", | |
| "\\item 'ApprovalDate'\n", | |
| "\\item 'AgreementDate'\n", | |
| "\\item 'Agreement'\n", | |
| "\\item 'EffectivityDate'\n", | |
| "\\item 'CancellationDate'\n", | |
| "\\item 'FinancialCompletionDate'\n", | |
| "\\item 'OriginalCompletionDate'\n", | |
| "\\item 'RevisedCompletionDate'\n", | |
| "\\item 'JSF'\n", | |
| "\\item 'Fund'\n", | |
| "\\item 'RevisedAmount'\n", | |
| "\\item 'Rating'\n", | |
| "\\item 'No'\n", | |
| "\\item 'PCR'\n", | |
| "\\item 'CirculationDate'\n", | |
| "\\end{enumerate*}\n" | |
| ], | |
| "text/markdown": [ | |
| "1. 'Number'\n", | |
| "2. 'Status'\n", | |
| "3. 'Cluster'\n", | |
| "4. 'Prefix'\n", | |
| "5. 'Type'\n", | |
| "6. 'Country'\n", | |
| "7. 'Dept'\n", | |
| "8. 'Division'\n", | |
| "9. 'LTAA'\n", | |
| "10. 'Loan'\n", | |
| "11. 'ApprovalDate'\n", | |
| "12. 'AgreementDate'\n", | |
| "13. 'Agreement'\n", | |
| "14. 'EffectivityDate'\n", | |
| "15. 'CancellationDate'\n", | |
| "16. 'FinancialCompletionDate'\n", | |
| "17. 'OriginalCompletionDate'\n", | |
| "18. 'RevisedCompletionDate'\n", | |
| "19. 'JSF'\n", | |
| "20. 'Fund'\n", | |
| "21. 'RevisedAmount'\n", | |
| "22. 'Rating'\n", | |
| "23. 'No'\n", | |
| "24. 'PCR'\n", | |
| "25. 'CirculationDate'\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| " [1] \"Number\" \"Status\" \n", | |
| " [3] \"Cluster\" \"Prefix\" \n", | |
| " [5] \"Type\" \"Country\" \n", | |
| " [7] \"Dept\" \"Division\" \n", | |
| " [9] \"LTAA\" \"Loan\" \n", | |
| "[11] \"ApprovalDate\" \"AgreementDate\" \n", | |
| "[13] \"Agreement\" \"EffectivityDate\" \n", | |
| "[15] \"CancellationDate\" \"FinancialCompletionDate\"\n", | |
| "[17] \"OriginalCompletionDate\" \"RevisedCompletionDate\" \n", | |
| "[19] \"JSF\" \"Fund\" \n", | |
| "[21] \"RevisedAmount\" \"Rating\" \n", | |
| "[23] \"No\" \"PCR\" \n", | |
| "[25] \"CirculationDate\" " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<ol class=list-inline>\n", | |
| "\t<li>9016</li>\n", | |
| "\t<li>25</li>\n", | |
| "</ol>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{enumerate*}\n", | |
| "\\item 9016\n", | |
| "\\item 25\n", | |
| "\\end{enumerate*}\n" | |
| ], | |
| "text/markdown": [ | |
| "1. 9016\n", | |
| "2. 25\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| "[1] 9016 25" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| "<thead><tr><th scope=col>Number</th><th scope=col>Status</th><th scope=col>Cluster</th><th scope=col>Prefix</th><th scope=col>Type</th><th scope=col>Country</th><th scope=col>Dept</th><th scope=col>Division</th><th scope=col>LTAA</th><th scope=col>Loan</th><th scope=col>...</th><th scope=col>FinancialCompletionDate</th><th scope=col>OriginalCompletionDate</th><th scope=col>RevisedCompletionDate</th><th scope=col>JSF</th><th scope=col>Fund</th><th scope=col>RevisedAmount</th><th scope=col>Rating</th><th scope=col>No</th><th scope=col>PCR</th><th scope=col>CirculationDate</th></tr></thead>\n", | |
| "<tbody>\n", | |
| "\t<tr><td>5001 </td><td>Financially Completed </td><td> </td><td>R </td><td>RETA </td><td>REG </td><td> </td><td> </td><td>Agriculture, natural resources and rural development</td><td>0 </td><td>... </td><td>1987-12-31 </td><td> </td><td>1987-12-31 </td><td>NA </td><td>2 </td><td>0.155 </td><td>NA </td><td> </td><td> </td><td> </td></tr>\n", | |
| "\t<tr><td> 1 </td><td>Financially Completed </td><td> </td><td> </td><td>ADTA </td><td>INO </td><td> </td><td> </td><td>Agriculture, natural resources and rural development</td><td>0 </td><td>... </td><td>1968-07-31 </td><td> </td><td>1968-07-31 </td><td>NA </td><td>2 </td><td>0.080 </td><td>NA </td><td> </td><td> </td><td> </td></tr>\n", | |
| "\t<tr><td> 2 </td><td>Financially Completed </td><td> </td><td> </td><td>ADTA </td><td>KOR </td><td> </td><td> </td><td>Agriculture, natural resources and rural development</td><td>0 </td><td>... </td><td>1969-10-31 </td><td> </td><td>1969-10-31 </td><td>NA </td><td>2 </td><td>0.067 </td><td>NA </td><td> </td><td> </td><td> </td></tr>\n", | |
| "\t<tr><td> 3 </td><td>Financially Completed </td><td> </td><td> </td><td>ADTA </td><td>PHI </td><td> </td><td> </td><td>Agriculture, natural resources and rural development</td><td>0 </td><td>... </td><td>1970-01-31 </td><td> </td><td>1970-01-31 </td><td>NA </td><td>2 </td><td>0.105 </td><td>NA </td><td> </td><td> </td><td> </td></tr>\n", | |
| "\t<tr><td> 4 </td><td>Financially Completed </td><td> </td><td> </td><td>ADTA </td><td>VIE </td><td> </td><td> </td><td>Finance </td><td>0 </td><td>... </td><td>1972-08-31 </td><td> </td><td>1972-08-31 </td><td>NA </td><td>2 </td><td>0.115 </td><td>NA </td><td> </td><td> </td><td> </td></tr>\n", | |
| "\t<tr><td> 5 </td><td>Financially Completed </td><td> </td><td> </td><td>PPTA </td><td>PHI </td><td> </td><td> </td><td>Transport </td><td>0 </td><td>... </td><td>1970-03-31 </td><td> </td><td>1970-03-31 </td><td>NA </td><td>2 </td><td>0.325 </td><td>NA </td><td> </td><td> </td><td> </td></tr>\n", | |
| "</tbody>\n", | |
| "</table>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{tabular}{r|lllllllllllllllllllllllll}\n", | |
| " Number & Status & Cluster & Prefix & Type & Country & Dept & Division & LTAA & Loan & ... & FinancialCompletionDate & OriginalCompletionDate & RevisedCompletionDate & JSF & Fund & RevisedAmount & Rating & No & PCR & CirculationDate\\\\\n", | |
| "\\hline\n", | |
| "\t 5001 & Financially Completed & & R & RETA & REG & & & Agriculture, natural resources and rural development & 0 & ... & 1987-12-31 & & 1987-12-31 & NA & 2 & 0.155 & NA & & & \\\\\n", | |
| "\t 1 & Financially Completed & & & ADTA & INO & & & Agriculture, natural resources and rural development & 0 & ... & 1968-07-31 & & 1968-07-31 & NA & 2 & 0.080 & NA & & & \\\\\n", | |
| "\t 2 & Financially Completed & & & ADTA & KOR & & & Agriculture, natural resources and rural development & 0 & ... & 1969-10-31 & & 1969-10-31 & NA & 2 & 0.067 & NA & & & \\\\\n", | |
| "\t 3 & Financially Completed & & & ADTA & PHI & & & Agriculture, natural resources and rural development & 0 & ... & 1970-01-31 & & 1970-01-31 & NA & 2 & 0.105 & NA & & & \\\\\n", | |
| "\t 4 & Financially Completed & & & ADTA & VIE & & & Finance & 0 & ... & 1972-08-31 & & 1972-08-31 & NA & 2 & 0.115 & NA & & & \\\\\n", | |
| "\t 5 & Financially Completed & & & PPTA & PHI & & & Transport & 0 & ... & 1970-03-31 & & 1970-03-31 & NA & 2 & 0.325 & NA & & & \\\\\n", | |
| "\\end{tabular}\n" | |
| ], | |
| "text/markdown": [ | |
| "\n", | |
| "Number | Status | Cluster | Prefix | Type | Country | Dept | Division | LTAA | Loan | ... | FinancialCompletionDate | OriginalCompletionDate | RevisedCompletionDate | JSF | Fund | RevisedAmount | Rating | No | PCR | CirculationDate | \n", | |
| "|---|---|---|---|---|---|\n", | |
| "| 5001 | Financially Completed | | R | RETA | REG | | | Agriculture, natural resources and rural development | 0 | ... | 1987-12-31 | | 1987-12-31 | NA | 2 | 0.155 | NA | | | | \n", | |
| "| 1 | Financially Completed | | | ADTA | INO | | | Agriculture, natural resources and rural development | 0 | ... | 1968-07-31 | | 1968-07-31 | NA | 2 | 0.080 | NA | | | | \n", | |
| "| 2 | Financially Completed | | | ADTA | KOR | | | Agriculture, natural resources and rural development | 0 | ... | 1969-10-31 | | 1969-10-31 | NA | 2 | 0.067 | NA | | | | \n", | |
| "| 3 | Financially Completed | | | ADTA | PHI | | | Agriculture, natural resources and rural development | 0 | ... | 1970-01-31 | | 1970-01-31 | NA | 2 | 0.105 | NA | | | | \n", | |
| "| 4 | Financially Completed | | | ADTA | VIE | | | Finance | 0 | ... | 1972-08-31 | | 1972-08-31 | NA | 2 | 0.115 | NA | | | | \n", | |
| "| 5 | Financially Completed | | | PPTA | PHI | | | Transport | 0 | ... | 1970-03-31 | | 1970-03-31 | NA | 2 | 0.325 | NA | | | | \n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| " Number Status Cluster Prefix Type Country Dept Division\n", | |
| "1 5001 Financially Completed R RETA REG \n", | |
| "2 1 Financially Completed ADTA INO \n", | |
| "3 2 Financially Completed ADTA KOR \n", | |
| "4 3 Financially Completed ADTA PHI \n", | |
| "5 4 Financially Completed ADTA VIE \n", | |
| "6 5 Financially Completed PPTA PHI \n", | |
| " LTAA Loan ...\n", | |
| "1 Agriculture, natural resources and rural development 0 ...\n", | |
| "2 Agriculture, natural resources and rural development 0 ...\n", | |
| "3 Agriculture, natural resources and rural development 0 ...\n", | |
| "4 Agriculture, natural resources and rural development 0 ...\n", | |
| "5 Finance 0 ...\n", | |
| "6 Transport 0 ...\n", | |
| " FinancialCompletionDate OriginalCompletionDate RevisedCompletionDate JSF Fund\n", | |
| "1 1987-12-31 1987-12-31 NA 2 \n", | |
| "2 1968-07-31 1968-07-31 NA 2 \n", | |
| "3 1969-10-31 1969-10-31 NA 2 \n", | |
| "4 1970-01-31 1970-01-31 NA 2 \n", | |
| "5 1972-08-31 1972-08-31 NA 2 \n", | |
| "6 1970-03-31 1970-03-31 NA 2 \n", | |
| " RevisedAmount Rating No PCR CirculationDate\n", | |
| "1 0.155 NA \n", | |
| "2 0.080 NA \n", | |
| "3 0.067 NA \n", | |
| "4 0.105 NA \n", | |
| "5 0.115 NA \n", | |
| "6 0.325 NA " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# column names\n", | |
| "names(foo)\n", | |
| "\n", | |
| "# dimensions of the data set\n", | |
| "dim(foo)\n", | |
| "\n", | |
| "# quick look at the data structure\n", | |
| "head(foo)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 82, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:21.890377Z", | |
| "start_time": "2020-01-03T03:46:15.074Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# one thing to be very careful with (in this data set) is the use of dates. 8 columns involve dates.\n", | |
| "\n", | |
| "# take note of the columns representing calendar dates\n", | |
| "date.columns <- c(11, 12, 14, 15, 16, 17, 18, 25)\n", | |
| "\n", | |
| "\n", | |
| "# these columns need some tweaking--I want to address missing values, calling the blank (empty) \n", | |
| "# elements \"NA\" instead of leaving them blank, and I wish to tell R these are \"Date\" objects.\n", | |
| "\n", | |
| "for(i in date.columns) # this \"for loop\" only loops through the \"date.columns\" -- no other columns.\n", | |
| " \n", | |
| "{\n", | |
| " \n", | |
| " # identify which values are missing in the \"i\"th column of the foo data set\n", | |
| " which_values_are_missing <- which(as.character(foo[, i]) == \"\")\n", | |
| " \n", | |
| " # those values that are missing (blank) in the \"i\"th column are replaced by <NA>\n", | |
| " # because R knows how to handle \"NA\" -- NA means something special in R--blanks are handled \n", | |
| " # more unpredictably (which is bad).\n", | |
| " foo[which_values_are_missing, i] <- NA\n", | |
| " \n", | |
| " # last step--replace each of these columns (which is structured as a column of \"factor\" values)\n", | |
| " # as a column of dates--i.e., convert them to an object of \"class\" = Date. They are dates, after all.\n", | |
| " # And if you convert them to the Date class, R will know they are dates and you can manipulate \n", | |
| " # dates in a simple, straightforward way. Otherwise, you won't be able to easily manipulate them\n", | |
| " # arithmetically. E.g., for simple Date operations, see lines 48-58 below...\n", | |
| " # **By the way, if you don't understand what a \"factor\" is in R, you should Google it.** \n", | |
| " foo[, i] <- as.Date(as.character(foo[, i]))\n", | |
| " \n", | |
| "}\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 83, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:21.936256Z", | |
| "start_time": "2020-01-03T03:46:15.081Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<time datetime=\"1968-03-13\">1968-03-13</time>" | |
| ], | |
| "text/latex": [ | |
| "1968-03-13" | |
| ], | |
| "text/markdown": [ | |
| "1968-03-13" | |
| ], | |
| "text/plain": [ | |
| "[1] \"1968-03-13\"" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<time datetime=\"1968-07-03\">1968-07-03</time>" | |
| ], | |
| "text/latex": [ | |
| "1968-07-03" | |
| ], | |
| "text/markdown": [ | |
| "1968-07-03" | |
| ], | |
| "text/plain": [ | |
| "[1] \"1968-07-03\"" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Time difference of -112 days" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# Now R knows that these columns are comprised of dates\n", | |
| "# for example... Replicate this yourself...\n", | |
| "\n", | |
| "foo[3,12]\n", | |
| "# [1] \"1968-03-13\"\n", | |
| "\n", | |
| "foo[4,12]\n", | |
| "# [1] \"1968-07-03\"\n", | |
| "\n", | |
| "foo[3,12] - foo[4,12]\n", | |
| "# Time difference of -112 days" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 84, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:21.967171Z", | |
| "start_time": "2020-01-03T03:46:15.083Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Also, one additional helpful hint... How to eliminate rows with NAs...\n", | |
| "# The \"is.na\" function--for more info, Google it or type ?is.na at the R command prompt in the console.\n", | |
| "which.have.NAs <- which(is.na(foo$Rating)) # for which rows is the claim \"is.na\" a TRUE claim?\n", | |
| "\n", | |
| "# Then, if you wanted to, e.g., remove all those rows, retaining only the rows with ratings...\n", | |
| "new_foo <- foo[-which.have.NAs, ]\n", | |
| "# Notice I called this tweaked data set \"new_foo\" instead of rewriting over the original data set...\n", | |
| "# It's a bit safer to do this, in case I decide I want to quickly revert back to the original data set.\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Question 1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 85, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.008094Z", | |
| "start_time": "2020-01-03T03:46:15.091Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<dl class=dl-horizontal>\n", | |
| "\t<dt>Min.</dt>\n", | |
| "\t\t<dd><time datetime=\"2007-01-31\">2007-01-31</time></dd>\n", | |
| "\t<dt>1st Qu.</dt>\n", | |
| "\t\t<dd><time datetime=\"2009-11-27\">2009-11-27</time></dd>\n", | |
| "\t<dt>Median</dt>\n", | |
| "\t\t<dd><time datetime=\"2012-11-16\">2012-11-16</time></dd>\n", | |
| "\t<dt>Mean</dt>\n", | |
| "\t\t<dd><time datetime=\"2012-12-06\">2012-12-06</time></dd>\n", | |
| "\t<dt>3rd Qu.</dt>\n", | |
| "\t\t<dd><time datetime=\"2016-01-13\">2016-01-13</time></dd>\n", | |
| "\t<dt>Max.</dt>\n", | |
| "\t\t<dd><time datetime=\"2018-06-29\">2018-06-29</time></dd>\n", | |
| "</dl>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{description*}\n", | |
| "\\item[Min.] 2007-01-31\n", | |
| "\\item[1st Qu.] 2009-11-27\n", | |
| "\\item[Median] 2012-11-16\n", | |
| "\\item[Mean] 2012-12-06\n", | |
| "\\item[3rd Qu.] 2016-01-13\n", | |
| "\\item[Max.] 2018-06-29\n", | |
| "\\end{description*}\n" | |
| ], | |
| "text/markdown": [ | |
| "Min.\n", | |
| ": 2007-01-311st Qu.\n", | |
| ": 2009-11-27Median\n", | |
| ": 2012-11-16Mean\n", | |
| ": 2012-12-063rd Qu.\n", | |
| ": 2016-01-13Max.\n", | |
| ": 2018-06-29\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| " Min. 1st Qu. Median Mean 3rd Qu. Max. \n", | |
| "\"2007-01-31\" \"2009-11-27\" \"2012-11-16\" \"2012-12-06\" \"2016-01-13\" \"2018-06-29\" " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "### ASSIGNMENT 1 -- You may want to read ALL the questions before you begin. \n", | |
| "### NOTE: FOR ALL QUESTIONS BELOW, ONLY CONSIDER PROJECTS WITH \n", | |
| "### non-missing \"Circulation.Date\" >= 2007-01-01. \n", | |
| "### EXCLUDE ALL OTHER PROJECTS FROM YOUR ANALYSIS.\n", | |
| "### YOU MUST provide a link to your R code. ------ DON'T FORGET TO DO THIS!!!!!!!!!!!!\n", | |
| "# Take note of the column names: i.e., you can type: names(foo)\n", | |
| "# fyi: the column called \"Rating\" is the success rating at completion. 0 = lowest, 3 = highest.\n", | |
| "\n", | |
| "\n", | |
| "noCircDateNA_foo <- foo[!is.na(foo$CirculationDate), ]\n", | |
| "df <- noCircDateNA_foo[which(noCircDateNA_foo$CirculationDate >= as.Date(\"2007-01-01\")), ]\n", | |
| "\n", | |
| "# Check:\n", | |
| "summary(df$CirculationDate)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 86, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.055966Z", | |
| "start_time": "2020-01-03T03:46:15.098Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "-0.106350446050535" | |
| ], | |
| "text/latex": [ | |
| "-0.106350446050535" | |
| ], | |
| "text/markdown": [ | |
| "-0.106350446050535" | |
| ], | |
| "text/plain": [ | |
| "[1] -0.1063504" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "\n", | |
| "Call:\n", | |
| "lm(formula = Rating ~ Delay, data = df_q1)\n", | |
| "\n", | |
| "Residuals:\n", | |
| " Min 1Q Median 3Q Max \n", | |
| "-2.00835 0.00467 0.05330 0.12812 1.37040 \n", | |
| "\n", | |
| "Coefficients:\n", | |
| " Estimate Std. Error t value Pr(>|t|) \n", | |
| "(Intercept) 2.008e+00 2.244e-02 89.484 < 2e-16 ***\n", | |
| "Delay -1.447e-04 3.036e-05 -4.766 2.01e-06 ***\n", | |
| "---\n", | |
| "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n", | |
| "\n", | |
| "Residual standard error: 0.6279 on 1986 degrees of freedom\n", | |
| "Multiple R-squared: 0.01131,\tAdjusted R-squared: 0.01081 \n", | |
| "F-statistic: 22.72 on 1 and 1986 DF, p-value: 2.011e-06\n" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# (a) Is this claim true? Explain. \n", | |
| "\n", | |
| "# Some projects have no original completion dates and no ratings.\n", | |
| "# We need to disregard them.\n", | |
| "compDateMask <- which(is.na(df$OriginalCompletionDate))\n", | |
| "df_withDates <- df[-compDateMask,]\n", | |
| "df_q1 <- df_withDates[-which(is.na(df_withDates$Rating)),]\n", | |
| "\n", | |
| "# Create a delay column\n", | |
| "df_q1$Delay <- df_q1$RevisedCompletionDate - df_q1$OriginalCompletionDate \n", | |
| "\n", | |
| "# The simplest way to assess the relationship between delays and ratings is to compute the correlation\n", | |
| "cor(as.numeric(df_q1$Delay), df_q1$Rating)\n", | |
| "\n", | |
| "# You could also use a simple linear regression model.\n", | |
| "summary(lm(Rating ~ Delay, data=df_q1))\n", | |
| "\n", | |
| "# The claim is somewhat true. \n", | |
| "# Delays are negatively correlated with ratings, but the association is very weak. " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 87, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.189149Z", | |
| "start_time": "2020-01-03T03:46:15.101Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "0" | |
| ], | |
| "text/latex": [ | |
| "0" | |
| ], | |
| "text/markdown": [ | |
| "0" | |
| ], | |
| "text/plain": [ | |
| "[1] 0" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| "<thead><tr><th scope=col>CirculationYear</th><th scope=col>mean.rating</th><th scope=col>median.rating</th><th scope=col>IQR.rating</th></tr></thead>\n", | |
| "<tbody>\n", | |
| "\t<tr><td>2007 </td><td>1.985612</td><td>2 </td><td>0 </td></tr>\n", | |
| "\t<tr><td>2008 </td><td>1.807882</td><td>2 </td><td>1 </td></tr>\n", | |
| "\t<tr><td>2009 </td><td>1.774869</td><td>2 </td><td>1 </td></tr>\n", | |
| "\t<tr><td>2010 </td><td>1.798701</td><td>2 </td><td>1 </td></tr>\n", | |
| "\t<tr><td>2011 </td><td>1.912409</td><td>2 </td><td>0 </td></tr>\n", | |
| "\t<tr><td>2012 </td><td>1.981132</td><td>2 </td><td>0 </td></tr>\n", | |
| "\t<tr><td>2013 </td><td>2.017857</td><td>2 </td><td>0 </td></tr>\n", | |
| "\t<tr><td>2014 </td><td>2.075000</td><td>2 </td><td>0 </td></tr>\n", | |
| "\t<tr><td>2015 </td><td>1.898810</td><td>2 </td><td>0 </td></tr>\n", | |
| "\t<tr><td>2016 </td><td>1.961702</td><td>2 </td><td>0 </td></tr>\n", | |
| "\t<tr><td>2017 </td><td>1.947368</td><td>2 </td><td>0 </td></tr>\n", | |
| "\t<tr><td>2018 </td><td>2.023810</td><td>2 </td><td>0 </td></tr>\n", | |
| "</tbody>\n", | |
| "</table>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{tabular}{r|llll}\n", | |
| " CirculationYear & mean.rating & median.rating & IQR.rating\\\\\n", | |
| "\\hline\n", | |
| "\t 2007 & 1.985612 & 2 & 0 \\\\\n", | |
| "\t 2008 & 1.807882 & 2 & 1 \\\\\n", | |
| "\t 2009 & 1.774869 & 2 & 1 \\\\\n", | |
| "\t 2010 & 1.798701 & 2 & 1 \\\\\n", | |
| "\t 2011 & 1.912409 & 2 & 0 \\\\\n", | |
| "\t 2012 & 1.981132 & 2 & 0 \\\\\n", | |
| "\t 2013 & 2.017857 & 2 & 0 \\\\\n", | |
| "\t 2014 & 2.075000 & 2 & 0 \\\\\n", | |
| "\t 2015 & 1.898810 & 2 & 0 \\\\\n", | |
| "\t 2016 & 1.961702 & 2 & 0 \\\\\n", | |
| "\t 2017 & 1.947368 & 2 & 0 \\\\\n", | |
| "\t 2018 & 2.023810 & 2 & 0 \\\\\n", | |
| "\\end{tabular}\n" | |
| ], | |
| "text/markdown": [ | |
| "\n", | |
| "CirculationYear | mean.rating | median.rating | IQR.rating | \n", | |
| "|---|---|---|---|---|---|---|---|---|---|---|---|\n", | |
| "| 2007 | 1.985612 | 2 | 0 | \n", | |
| "| 2008 | 1.807882 | 2 | 1 | \n", | |
| "| 2009 | 1.774869 | 2 | 1 | \n", | |
| "| 2010 | 1.798701 | 2 | 1 | \n", | |
| "| 2011 | 1.912409 | 2 | 0 | \n", | |
| "| 2012 | 1.981132 | 2 | 0 | \n", | |
| "| 2013 | 2.017857 | 2 | 0 | \n", | |
| "| 2014 | 2.075000 | 2 | 0 | \n", | |
| "| 2015 | 1.898810 | 2 | 0 | \n", | |
| "| 2016 | 1.961702 | 2 | 0 | \n", | |
| "| 2017 | 1.947368 | 2 | 0 | \n", | |
| "| 2018 | 2.023810 | 2 | 0 | \n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| " CirculationYear mean.rating median.rating IQR.rating\n", | |
| "1 2007 1.985612 2 0 \n", | |
| "2 2008 1.807882 2 1 \n", | |
| "3 2009 1.774869 2 1 \n", | |
| "4 2010 1.798701 2 1 \n", | |
| "5 2011 1.912409 2 0 \n", | |
| "6 2012 1.981132 2 0 \n", | |
| "7 2013 2.017857 2 0 \n", | |
| "8 2014 2.075000 2 0 \n", | |
| "9 2015 1.898810 2 0 \n", | |
| "10 2016 1.961702 2 0 \n", | |
| "11 2017 1.947368 2 0 \n", | |
| "12 2018 2.023810 2 0 " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0gAAANICAMAAADKOT/pAAAAPFBMVEUAAAAAAP8A/wBNTU1o\naGh8fHyMjIyampqnp6eysrK9vb3Hx8fQ0NDT09PZ2dnh4eHp6enw8PD/AAD////51B7lAAAA\nCXBIWXMAABJ0AAASdAHeZh94AAAgAElEQVR4nO3diXaqSABF0YoMIvIEmv//12ZUVMSBS1Gl\nZ6+VfiYBy4inBUQ0FYDFzNY3APgGhAQIEBIgQEiAACEBAoQECBASIEBIgAAhAQKEBAgQEiBA\nSIAAIQEChAQIEBIgQEiAACEBAoQECBASIEBIgAAhAQKEBAgQEiBASIAAIQEChAQIEBIgQEiA\nACEBAoQECBASIEBIgAAhAQKEBAgQEiBASIAAIQEChAQIEBIgQEiAACEBAoRkg+kFcfZsumdX\ntZ+6XhOnMxO/cK1YirvYBnORPJlu/opOwdUEo+uNHk9MSBZwF9swesCb0/x0T6/o0fXePycN\nExOSBdzFNgwP5SwwZv9k2peu6ObbMjEmfDYx1sRdbcP5IZ13l+r/FmG3lpft62/22fV0ZRKY\nICmq/puwXnPLqvMT0MT1ni8d4/pS2Mx6mbj/XfNPFtWD5d0sRT1ylA6/LQ9Rs611XOcO+H6E\nZMPtA755sHebNdGwt2A8XRGM1gKHb5JXQhqurp51KqSk+1lb0qnftup+O4wyta2FFxCSDeNn\npKAaiqj/7x+fN3Hi0XTDgzoYf2OyxyE1Ty7NFaR1CGVVJc13UyH19lfX2/52396cMpra1sIL\nCMkGc7ONZLoHfJXVF9KyXq1qOxmm63Iou8d2/U2Qt4/wsLrb7DEjzdNX/TRXXMYbJr6EFGRt\nMs23x+67Y3CetpmznNrWwgsIyYbxAz7vvm+3ivbDE0ByDqxqn6bKbq64/aaZtAwPRTUXUnbz\n82oqpPaazoO0sxy7b5ue9vMvcmEOIdlw+4A351S6f6ti/NC/TBzcpvMopEM5/Kg4JtH1Kt14\nZ8Pl32C4pu7bQ7/SR0sfIiQbhsd7lJTD91f/Xj+HjLJ7HlKdTh1O0O/hO4aj7Z7ZkMx1SFUy\ntFto//RfQUg23Dz+x4/p8zNSUN09WdzPOhVSu6+u29nWrKeF+zR/IaSbZ6R6le/Y7fJjt91H\nCMmGRyHFj7aRLmtY0ZNtpPbfoN0v0e5syC4/nw3pZhupk+1vbylexN1mw6OQpvfaNTvUTu0/\n0dReu/L+ek79rvL++9EzUlk9CGnYa9evBobnrbZgpfvgyxGSDY9CuryA2r22Y65fR2p3aZ+/\nSftvkonr6Z/aova3WXBee2snngzp5nWkOumoaPc5zB5Vi0cIyYaHIZ1LunrDQ9b/sH1Mn4LR\nN/vrjZjz9RTd1tZp2GPQNjhMPB1SP0h0s7OBTaTPEJINj0OqN0uCqWPt6lWt4b1LzYF352/i\nq6NeL9eTdIc25HU8wT4vuu/i8ZbX3a7CZtoou6xmtt1xXMOHCMklG2zql2wVSRCSQzKLIZlu\nR18eDQfMYhFCcka7lWLtQX3ZzWE4mkGAkJwRW31Qn983wW46CUJyRh1SaPF9deWhKTfg6DoN\nQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUIC\nBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQI\nCRCwEJIBPPPBo1wfzgZDwDleL3VCgiu8XuqEBFd4vdQJCa7weqkTElzh9VInJLjC66VOSHCF\n10udkOAKr5c6IcEVXi91qyGdDnH7InCcnNYaAv7yeqlbDKkMRwdURKsMAZ95vdQthpSY4Ji3\nl4osMMkaQ8BnXi91iyEFJj9fzk2wxhDwmddL3WJIVwfIzh8t6/Vdig95vdR5RoIrvF7qdreR\nsqK9xDYSJni91G3u/o5Ge+3CcpUh4DGvl7rd15GS9nWkID7wOhLueL3UObIBrvB6qRMSXPHq\nUg/itN/WTuPZfVY3V98fCnC3NpR1v339miav3cosDg4B51wv9f8G99MZs28v7N8658h5+/ym\npNB0v33z1t5eu5VZJq6E15Fw4/WQwu6JKAjfC6n9J7k9PG1pQv21WJll4krurmV8aqN//6qK\nr1/7Mjff//df93U3Xf34yOt/c3M/zyvXfzvPO9fx8ItVO7ji9WekzKT1v6k59v8/TkMTpO2l\nLDYmSLqpitgEh/Fs43/PE/Ynpeu+LrMkzUudbzxZERJc8XpIpYnrf2NTdI/0+PyGgkO3RtMF\nEjQXD6PZ2n+SmwmvQjrP0r7muSckeOj1kLodBCbo4shMVFZlZLLmV8eq6p6nTPPT1ISj2Xp5\ndTNhf6WjWTIT5FUeuB1SGpgwXXcI+OiNkBJzqk5m30UQm+Ywme5Zapig+c+pqsYrZ8Pu7/zq\nmq5DGmaJmyrrnNwMKY+bNdkDb+zDpDdCOtbrX4f6SaUvYXT67SI7RDd9XGarqjDI+m8nJry9\n5Og2Ut6tlZp9WdXbdLPPSYT0i94Iqaj/TxyZ4j6k6HxpOqSTMd2LuVMTehLSvtm0S7r3T5Sj\ndVfhEPDZq0u9eXgHpmweSaMSWvt6qyErZkKqV9rihxN6ElJ/M+PRN+oh4LN3Qto3qzbVsI2U\njX9VzYaUDzsbJia8XHJ6G6m7VcdunY439uHWOyEdTbvfrX1MHZtdbFXa/C+62V2Qz20jDU9J\nVxMW1W1ITu+12zdbR51yzxv7cOudkOonk+bx3z3Su+2doGi2HM6H0z0KqWyfkkYThmZYSRzN\nEl02u7Q3ftksrfISuJl/QiKkn/ROSPVGUlCd40jrGPbtToR9c3h31j05VdVUSHVD8dWEp3Ai\npObIhujkZkjNjesvBLPPR4T0m9xb6k9epbma9INrf38WB4eAcxxa6u0GWBnPb4Bcz/HBIO/P\n4uAQcI5DS70/FO+dtw2+PwghYRUuLfU0MiZ8/fmIkOAOr5c6IcEVXi91QoIrvF7qhARXeL3U\nCQmu8HqpExJc4fVSJyS44tWl/vwEkaN3QjweTXu+SEKCK14/1u7ZCSLfCEl1vkhCgivul/rf\n39/UdE9PEPlKBuLzRRISXHG71P8699OZpH1zXl7/uziku2kJCZ57PaTHJ4i8nNixO1OXtfNF\nEhJccbPU//4elDRzgsjuDXnxkILF80USElzxekgPTxB5vLxFvPuydr5IQoIr3gjp0Qki43Yf\nXHa9167vY93zRRISXPH6NtLDE0SOTqPVXbR2vkhCgiveCOnRCSJvQ7J3vkhCgitefx3p4Qki\nb0KyeL5IQoIrXl3qXSKTJ4jsLp4uOxsqW+eLJCS44p2QHpwgMrvda2ftfJGEBFe8E9KDE0R2\nLynthwwsni+SkOCKd0J6dILI6nB1ZIO980USElzh/FKfO18kIcEV7i71F84XSUhwhbtL/YXz\nRRISXOHwUn9+vkhCgiu8XuqEBFd4vdQJCa7weqkTElzh9VInJLjC66VOSHCF10udkOAKr5c6\nIcEVE0t9NzldP2GZhM2rO+Xw0wdnTu0n3pvzgQnik6x212llFgeHgHPul/puN1VS/0g/Dm+M\nTfufTp85tRePTiEkPsnqgxu/xiwODgHnvBdS3VFSVFWR9CX1j/+7M6eeZypurkB2ktVuXiuz\nODgEnHO31He7yZLax3sZDO+LzUx7FqGhggc1jH/8aFJCwje4Xeq7uZDSy6HYSbvONllHGpow\nrYaVuasrOP/7+klWs6jesMqqSYQEV0yHdF9Sf6KG8wnpTu0q2tT6WjScgnUypHdPspqON8me\n3fgP/t4VENIvulnqu92Dkm5PHXRzUq7LGR+H864e76b/6CSrQTP9cXSy1pkb/8HfuwZC+kWS\nkMZnTh1O/hPdTX836V1IEydZNebBat3EjX8FIWEV10t9t3tU0qOQqvGZU6vLNNPTv3+S1cSY\nOB/FN3PjX0JIWMWrS/12Gynvz6hVjc6cepnwUUgfnGT1EAwnK/r8xi+bxcEh4Jy3Qur32uVF\n80yRDT8dzpw6mvBRSB+dZDVLQraR4Li3QupfR6pr6Df/u4d6Pt7ZMGzfnJ+wxlfw6UlWH71O\n9eKNXzaLg0PAOW+F1Dy2myMbDsPKVv8AHz8lPd5rN5r09ZOsht3+PZ6R4Lb3QmpKGh9sd3Xm\n1N75daTJkN49yepx7lg+QoIr3gxpOPo7i67W3ZLxVlIadEc2TIb09klW2yMbHhxdTkhwxcdL\nPTs8n2ahuZOsdhN8cJ0f3RLXhoBznFzqL5xktZvug6v+6Aa5NgSc4+RSf+Ekqy1CgivcXOrP\nT7LaIiS4wuulTkhwhddLnZDgCq+XOiHBFV4vdUKCK7xe6oQEV3i91AkJrvB6qRMSXOH1Uick\nuMLrpU5IcIXXS52Q4AqvlzohwRVeL3VCgiu8XuqEBFcYr33w9+rvwg2GAJTsh5SGxsQz534V\nDAHYZjGk7tmvPynL/DulCAmesR1SYprP/CySBx+OsXAIYCO2QwraT1erygen2Vs4BLAR2yE9\n+YDChUMAG7Ed0n4Iafa0LIQEz1gNKT6kWXsi5jKZ39tASPCM1ZDOr1wZE5RrDAFsxObrSHme\npnHc7nJIZjsiJPiGIxsAAUICBAgJENgqJF5HwldxJ6TxIen//lUVX3x59MWqHSBASIAAIQEC\nNkMqm4+97d/Sx84GfBWLIZVBuyOh+8xpQsJXsRhS+2a+Mg3az4cmJHwViyEF3YxFEBaEhC9j\n/ZwN9ZNSFBESvozFkEIzHPIdRoSE72IxpNTs+0uFiQgJX8Xm7u/kXE/25MyUhATPWH1BNo+H\nS8WekPBNOLIBECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQ\nAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQICRAgJECAkAAB\nQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUIC\nBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQI\nCcv8N9j6hmyLkLAMIbUICcsQUouQsAwhtQgJyxBSi5CwDCG1rIZ0OsSmESentYaAbYTUshhS\nGZqLaJUhYB8htSyGlJjgmLeXiiwwyRpDwD5CalkMKTD5+XJugjWGADZiMSRjHn0jGwJYzfxT\nL89IaLCC9pQzIdXbSFnRXmIbyTmE9JQzIVXRaK9dWK4yBD5ESE+5E1J1StrXkYL4wOtIjiGk\npxwKafEQf39/n17l57P+xJzNA6Se9cOQPPpDP59zuIf8D+mv88kVfj7rj8zZPkj8ubmb3EX9\nnN6HNP+HzPNqiRGSk3O6GdIHryP9/X18H3w+66/M+Z9fN9e9Od0JabRLz/z7V1X3X39/3dfU\n7559fTrvMN/fl4/539/y2/v2cvmmMT1ateMZacU5Pbu57s3pUUhsIz3FXbTZnIT0TXP+zF20\n4A/9gpDKvTFR1l/J+zsb5vfjP/PZfefZnD9zFy36Q1e5tTbf2Bd0b4/truSjkHjxfd7P3EXu\n/aFWD1pN65rSoH1zLCGt4Wfuos//0LXuIqtvo2j/KYKw4P1I6yCkFeect8Eb+8ooIiQs8tMh\nhWZ460QYERKW+OmQUrPvLxUmIiQs8NMhVcm5nswQEhb47ZCqPB4uFXtCeuhndhls4StCcmkI\nhxHSigjpdxDSigjpd/xKSF/1dxKSe77qATbjq/5OQnLPVz3AZnzV30lI7vmqB9iMr/o7Cck9\nX/UAm/FVfychueerHmAzvurvJCRshZBWR0i/gJBWR0i/gJBWR0jwDCEBAoQECBASIEBIgAAh\nAQKEBAgQEiDwEyF91St/cBIhAQKEtBbq/SmEtBZC+imEtBZC+imEtBZC+imEtNKchPRbCGml\nOQnptywMyVxEiexGERJ8owvJmGDLW7UOQsJrlq7a7YPmY8qzwJyq2MiekwgJnlkYUmLy9t/c\nRFVpQs1t+o6Q8FMWr9qNLsx/eNinQ2yKkPCahSEF52ekgJDwwxav2g3bSEl1rFfvtrtV6yAk\nvGbpzoZo2PndPCGlG94qYEuLX5DN4jqjuHlaMgfNTbobAnDeTxzZAKyNkAABQgIEloZ0CIcD\nhFS36G4IwH0LQzpcjrST3aSKkOCdxS/IynZ5PxoC8IDqECEtQoJnFoYUm1J2Ux4MAXhgYUhF\nEJ1kt2V6CMADujf2yW5SRUjwDiEBArwgCwgQEiCwIKTuTbGs2gGEBEiwagcIEBIgoDpEKNCd\nHbIiJHhHFFLBNhJ+2oKQsqvzFctODvnhrQK2tOQZKRx3JD3kjpDgGd5GAQiw1w4QUIV0ipfe\nkqdDAO5aGlLCkQ2A4Nzfg0x2kypCgncWn/zkWEWmKCLDXjv8MsFeu0P9bJTrPonidgjAA4KQ\nsuaUXGwj4actPovQsSpMWJ0ICT9tYUhZE1D7GUl72U2qCAneWXzu7+a7vdF9oPn9EID7OLIB\nELAfUhr2n/C33hCAbaqQ8ueHCHX7I/oPnZ1fFSQkeGZJSKc6iihvLuXxC3vt2kkSk5RVVSTz\nn2JBSPDMgpBO3XNLXhXx06eYdr5mxqA76345/0ZAQoJnFoQUNfEkJmreKRu/8KEUbUjDM9f8\nMxghwTMLz2vX/Dcwcf7SfN2e8v6b2ZOlEBI8Iwjp1XeZ189bhzQzx/pimcyvChISPCMI6eX5\nLm9cqp/GZtcFCQmesRhSledpGsftLodkfpuKkOAZmyG9PwTgiUUhXdn4VgFbIiRAYKuDVnkd\nCV/FnZDGz27//lUVX3x59MXbKAABQgIECAkQICRAgJAAAUICBCyG9MYLuIQEz1gMKSUkfK3F\n57ULXwmjkwevniCckOCZhSEd3jrWLn/1PJKEBM8s/liX2bMB3UrNS29KJyT4RvBpFCsgJHhm\n8adRvHD6oGVDAB5YGFIRRNKP6psYAvDA4lU73tgHEBIgwSFCgAAhAQKqkE7PP9Zl6RCAu5aG\nlLCNBCwO6dLRk8/g+3wIwAOLDxE6VpEpishIX04iJHhGcIjQoX42ys2rB3a/PQTgAUFIWXPg\nKttI+GmLj7U7VoUJm4/BlN2kipDgnYUhZU1A7QeV72U3qSIkeGfxO2Sb7/avfBbzx0MA7uPI\nBkCAkACBxSFlcbOZFBei2zM1BOC8pSFF3dFBJpCWREjwzMKQUhOVTUgpe+3w0xYfIlR2r8Xy\nOhJ+muDIBkICFoYU9s9IuQllN6kiJHhHs42UvXmiyHeGADywdK9d3L8dSXrwNyHBN5LXkUx8\nFN2cySEA53FkAyBASIAAIQECC0IKXv8EPgu3CtjSgpBiQgJ6C0JKTZgctUd93w4BeGJBSMW+\nWbkL9ivEREjwzLKdDXnart/JYyIkeGb5XrvToX1PUqC5PZNDAK6T7P4uE3Y24LfxjAQIsI0E\nCCzea7fKLnBCgmcWvo6UldJbczsE4AmObAAEONYOEODob0CAkAABQgIECAkQICRAgJAAAUIC\nBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQI\nCRAgJECAkAABQgIECAkQsBrS6dB9gkWcnNYaAtiExZDKcPTZFdEqQwAbsRhSYoJj3l4qssAk\nawwBbMRiSIHJz5fz+Q9vJiR4xmJIV59FNv/BZIQEz/CMBAjY3UbKug9AZxsJ38bm7u9otNcu\nnP049DVC2nk1Jzxj93WkpH0dKYgP9l9H2u0+fVRvMSd88zNHNhAS1vQrIe12nz6qt5gT3iEk\nF+eEd7YKyfLrSLvdp4/qLeaEf9wJabRLz/z7V1Xar92u+3p73k/nWzImX/59/caq3W736dPD\nFnPCQ4Tk3pzw0E+EtNt9+qjeYk746CdCAtZGSIAAIQECVt+PdGWNIYCNWAwpJSR8LZurdnkw\nf8oTwRDANqxuI+Xzb+dTDAFswu7OhnT0bvOVhgC2wF47QICQAAFCAgQICRAgJECAkAABQgIE\nCAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJ\nECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQICRAg\nJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRA\ngJAAAUICBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBOyHlIbGxNmq\nQwC2WQzJtDNGppWsMgSwEdshJSYpq6pITLrGEMBGbIcUmLK5XJpwjSGAjdgOyZjRN/IhgI3Y\nDmk/hBSsMQSwEashxYc0M8f6YpnM720gJHjGakid9mJQrjEEsBGbryPleZrGcbvLIZntiJDg\nG45sAAQICRAgJEBgq5B4HQlfxZ2QzMi/f1XFF18efbFqBwgQEiBASICA1ZBOh7jdBIqT01pD\nAJuwGFIZjnYnRKsMAWzEYkiJCY55e6nIAg5axVexGFJg8vPlnLdR4KtYP2fD1DeyIYCN8IwE\nCNjdRsqK9hLbSPg2Nnd/R6O9diFv7MM3sfs6UtK+jhTEB15HwnfhyAZAgJAAAUICBAgJECAk\nQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQICRAgJECA\nkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAA\nAUICBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFC\nAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQICRAgJEDAakinQ2wa\ncXJaawhgExZDKkNzEa0yBLARiyElJjjm7aUiC0yyxhDARiyGFJj8fDk3wRpDABuxGJIxj76R\nDQFshGckQMDuNlJWtJfYRsK3sbn7OxrttQvLj4bYfTq2bz7/QxfcRVsM+i1z2n0dKWlfRwri\nw4evI+12P1LS53/ogrtoi0G/Zk6/jmwgpBXndO2h6decXoW02/1ISZ//oQvuoi0G/Z45/Qvp\nF0oiJHfnfDDrViF98jrS7ldC+vwPXXAXbTHoF83pTkijXXrm37+quv/a7bqvqd991deCv3PJ\nfdTOy5ifjenRqt3ubP0bsKnP/84F99AWg3o65+SshOQeQnJ7Ts9D2u3m/5Kv8fnfueAe2mJQ\nb+ecmpU39gECvLEPEOCNfYAAb6MABHhjHyDAMxIgwBv7AAHP3tgHuMmvN/YBjvLnyAbAYYQE\nCBASIEBIgAAhAQKEBAgQEiBASIAAIQEChAQIOBoS4JkPHuX6cGzY4mb/ypg/84dKxyQkxnRh\nUO/HJCTGdGFQ78ckJMZ0YVDvxyQkxnRhUO/HJCTGdGFQ78ckJMZ0YVDvxyQkxnRhUO/HJCTG\ndGFQ78ckJMZ0YVDvxyQkxnRhUO/HJCTGdGFQ78f0NCTALYQECBASIEBIgAAhAQKEBAgQEiBA\nSIAAIQEChAQIEBIgQEiAACEBAoQECBASIEBIgIAPIaWhCZKyvZgEExfL0U/XHbP++XCHbTHm\naAJ7g9ZO4kfJ0zHzvTH7wuqYSx9EHoSUtJ8PEDR/Y9ReDKuri0XQTaC846fHrJfw8EEFVz+1\nNOZoAnuD1spA+yh5Omam/0Ofjbn4QeR+SLnZl83/OPbN/xmDvMoDc7q6uDdJ1dxT+7XHrJp/\nuzvs6qeWxhxNYG/QRvzJx5wsGTOof1rG7WK1NebiB5H7IcXdTWz+4MRk9aWjOVxdNJcJ1h2z\nXhBRP8r4p7bGjPV/5/NB2x9pQ3o65rF9UJcmsDjm4geR+yH1mj8xNs0zb27iq4v9iofyfp8e\ns6oX8PlBffmprTFHE9gctBhHZWXMvclXGG9+zMUPIl9CKk109b+N0cVD/6wsfHaYHrPKb//P\npX+EzYw5msDmoJEp1ghpZszQVIegXRezN+biB5EvIaXNE/J0SFXabCgG6epjjv5dLaSZMUcT\nWBz0YI6rPAnO3rlxu+Fvc8zFDyJPQiqC9pl4OqRDu8NF/oR0N2Z19638ETY35mgCe4O2q0Ar\nhDR/5zY7G/byJTp75y59EPkRUhm06zPTIaXNs3J9v4ufku7HvP9W/QibHXM0gb1Bw2aP8Qpr\nsPN3brONVGhfXHgy5uIHkR8hRd19Glzug9HF0DRr06X6fr8fc/RvsE5Is2OOJrA26L5dkdSH\nNPuHrvR/qdkxFz+IfAipCKPudbJuh0tx2WtXjFY8tPf71JijUW5+amXM0QTWBjVn9sZcZT//\nszF/Yfd3dt5PdWj//5g1z8Kji93/YqQvO0yP2Tjv5Ln6qZUxRxNYG3SVkF67cwvpX/tszMUP\nIvdDGt2h00c2JKY5RCpRPqinx2z09/sKRzY8HVP7yHpx0LvLFsast47KZnvlaHHMxQ8i90Pa\nj/6fGLYX2vtkdDG6XFx3zOrymArtjzmewNqgd5dtjHnYYIEufRC5H9J45aI7RLf98ehiNbq4\n6pjV5X4v7Y+5xlrW8z/09rKVMbPI+gJd+iByPyTAA4QECBASIEBIgAAhAQKEBAgQEiBASIAA\nIQEChAQIEBIgQEiAACEBAoQECBASIEBIgAAhAQKEBAgQEiBASIAAIQEChAQIEBIgQEiAACEB\nAoQECBASIEBIgAAhAQKEBAgQEiBASIAAIQEChLSVfB+YfdZefPKJeLe/zp7Ps7983nps9p/d\nQLyDkDaSdB/FGDafVf9eSKGZ+OGt0KTdhdSEn99IvIyQtnEwQf3EUtb/FE+nvWnmpU90zY1p\nr7gwJv/oBuI9hLSJYgho/8KK1ych1aX2n/h+ePvG4QOEtIlkeHyXcdqlYUwZths2SWCiy/pe\n96v6Qhab7lO3+w/n7n6dhiZMu+mK2ATjaNqEDsMH3tcTBt3K3vmKLkNiOULaRHS1wtWFUT/A\nk+Y3taC8DenQbWVOoLEAAAIESURBVFIl1yG1E7et1PM0F0cl1St1Vdmv4FXxecLLFZ2HhAAh\nbeJ69awLI6rrqY7NP/sumMuvmq9j80tTjX9xNEFe5UHzq3b26x0LqTkk/S6HrPltGZns5ora\nIaFASJuYCunUXIybf0oT3K3aXWa7/CJuymgqGWa/vtrIDCt2sWmKKS/rcf0VnVb4034UIW1i\nKqSbX9yGVGSH6Cak0TyXH17UK3blcF29iSuCBPflJuLzNlJWvhZSdC7h5ZAu345Dur0iSHBf\nbuIw7BY4NZs1L4S0N2GaFZ+HdP7Z3RVBgvtyE+fXkSKTXkURXW8jnS47G6puR9zUNlL8LKRu\nwsvPCEmO+3Ib+/bIhua1n+oqpLTZk5Y0e+2ag3zK6BLSqcqHTZvhZaarvXZV9TikdsL6uuOb\nK7L2534/7suNRDfH2g2P6vPrSGlzIR5+1R+a1zxfheb8hDV+HamqHofUT9g8C46uiJCEuC+3\ncqwjiY7txat93PUjPW5X+w6B2V9+ta+nPrVrcafwsnc8Dc5HNlz+ezb6Nq3z27dXe7kiQhLi\nvgQECAkQICRAgJAAAUICBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUIC\nBAgJECAkQICQAAFCAgQICRAgJECAkAABQgIECAkQICRAgJAAAUICBAgJECAkQOB/YSUxXm7G\nQMoAAAAASUVORK5CYII=", | |
| "text/plain": [ | |
| "Plot with title \"Project Ratings\"" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# (b)\n", | |
| "\n", | |
| "# Create a circulation year column\n", | |
| "df_q1$CirculationYear <- format(df_q1$CirculationDate, \"%Y\")\n", | |
| "\n", | |
| "# Are there rows with NAs in the Revised Completion Date column?\n", | |
| "sum(is.na(df_q1$RevisedCompletionDate)) # No\n", | |
| "\n", | |
| "# The simplest way to do it probably involves a for loop in which you iterate through\n", | |
| "# circulation years. I'm providing a fancier solution so that you can get used to \n", | |
| "# reading more complex R code. The dplyr library provides a data processing pipeline\n", | |
| "# that is very similar to data analysis in SQL. DataCamp has tons of resources on it.\n", | |
| "\n", | |
| "library(dplyr) \n", | |
| "RatingsByYear <- df_q1 %>% # %>% means \"pass what is to the left to the function that follows\"\n", | |
| " group_by(CirculationYear) %>% \n", | |
| " summarise(mean.rating = mean(Rating), \n", | |
| " median.rating = median(Rating),\n", | |
| " IQR.rating = quantile(Rating, 0.75) - quantile(Rating, 0.25))\n", | |
| "RatingsByYear\n", | |
| "\n", | |
| "# Not the prettiest plot, but you get the point.\n", | |
| "plot(RatingsByYear$CirculationYear, RatingsByYear$mean.rating, \n", | |
| " pch=15, col=\"red\", ylim=c(0, 3), \n", | |
| " xlab=\"Circulation Year\", ylab=\"Mean Rating\", main=\"Project Ratings\")\n", | |
| "points(RatingsByYear$CirculationYear, RatingsByYear$median.rating, pch=16, col=\"blue\")\n", | |
| "points(RatingsByYear$CirculationYear, RatingsByYear$IQR.rating, pch=17, col=\"green\")\n", | |
| "legend(\"topright\", pch=c(15,16,17), \n", | |
| " col=c(\"red\",\"blue\",\"green\"), \n", | |
| " legend=c(\"Mean Rating\", \"Median Rating\", \"IQR of Ratings\"))\n", | |
| "grid(nx=NA, ny=NULL)\n", | |
| "\n", | |
| "# RESPONSE:\n", | |
| "# Project ratings have fluctuated slightly over the years. Starting with an average of 1.98 in\n", | |
| "# 2007, it decreased to 1.77 in 2009 before continuously increasing to 2.07 in 2014. The average went\n", | |
| "# down to 2.02 in 2018. The median rating is 2 throughout the years, and the IQR is zero for almost every year, \n", | |
| "# with 2008-2010 being exceptions with an IQR of 1." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 88, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.315037Z", | |
| "start_time": "2020-01-03T03:46:15.109Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Time difference of 1210.697 days" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Time difference of 1118 days" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Time differences in days\n", | |
| " 0% 25% 50% 75% 100% \n", | |
| " 56.00 836.75 1118.00 1477.00 4395.00 " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "640.25" | |
| ], | |
| "text/latex": [ | |
| "640.25" | |
| ], | |
| "text/markdown": [ | |
| "640.25" | |
| ], | |
| "text/plain": [ | |
| "[1] 640.25" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table>\n", | |
| "<thead><tr><th scope=col>CirculationYear</th><th scope=col>mean.duration</th><th scope=col>median.duration</th><th scope=col>IQR.duration</th></tr></thead>\n", | |
| "<tbody>\n", | |
| "\t<tr><td>2007 </td><td>1124.094 days</td><td>1106.0 days </td><td>548.50 days </td></tr>\n", | |
| "\t<tr><td>2008 </td><td>1194.404 days</td><td>1118.0 days </td><td>515.00 days </td></tr>\n", | |
| "\t<tr><td>2009 </td><td>1276.963 days</td><td>1152.0 days </td><td>636.50 days </td></tr>\n", | |
| "\t<tr><td>2010 </td><td>1297.779 days</td><td>1150.0 days </td><td>620.50 days </td></tr>\n", | |
| "\t<tr><td>2011 </td><td>1188.139 days</td><td>1115.0 days </td><td>652.00 days </td></tr>\n", | |
| "\t<tr><td>2012 </td><td>1089.484 days</td><td>1049.0 days </td><td>537.00 days </td></tr>\n", | |
| "\t<tr><td>2013 </td><td>1147.821 days</td><td>1099.0 days </td><td>687.50 days </td></tr>\n", | |
| "\t<tr><td>2014 </td><td>1167.700 days</td><td>1136.5 days </td><td>586.25 days </td></tr>\n", | |
| "\t<tr><td>2015 </td><td>1249.220 days</td><td>1153.0 days </td><td>763.25 days </td></tr>\n", | |
| "\t<tr><td>2016 </td><td>1227.009 days</td><td>1122.0 days </td><td>644.00 days </td></tr>\n", | |
| "\t<tr><td>2017 </td><td>1261.437 days</td><td>1146.0 days </td><td>654.25 days </td></tr>\n", | |
| "\t<tr><td>2018 </td><td>1319.476 days</td><td>1123.5 days </td><td>639.00 days </td></tr>\n", | |
| "</tbody>\n", | |
| "</table>\n" | |
| ], | |
| "text/latex": [ | |
| "\\begin{tabular}{r|llll}\n", | |
| " CirculationYear & mean.duration & median.duration & IQR.duration\\\\\n", | |
| "\\hline\n", | |
| "\t 2007 & 1124.094 days & 1106.0 days & 548.50 days \\\\\n", | |
| "\t 2008 & 1194.404 days & 1118.0 days & 515.00 days \\\\\n", | |
| "\t 2009 & 1276.963 days & 1152.0 days & 636.50 days \\\\\n", | |
| "\t 2010 & 1297.779 days & 1150.0 days & 620.50 days \\\\\n", | |
| "\t 2011 & 1188.139 days & 1115.0 days & 652.00 days \\\\\n", | |
| "\t 2012 & 1089.484 days & 1049.0 days & 537.00 days \\\\\n", | |
| "\t 2013 & 1147.821 days & 1099.0 days & 687.50 days \\\\\n", | |
| "\t 2014 & 1167.700 days & 1136.5 days & 586.25 days \\\\\n", | |
| "\t 2015 & 1249.220 days & 1153.0 days & 763.25 days \\\\\n", | |
| "\t 2016 & 1227.009 days & 1122.0 days & 644.00 days \\\\\n", | |
| "\t 2017 & 1261.437 days & 1146.0 days & 654.25 days \\\\\n", | |
| "\t 2018 & 1319.476 days & 1123.5 days & 639.00 days \\\\\n", | |
| "\\end{tabular}\n" | |
| ], | |
| "text/markdown": [ | |
| "\n", | |
| "CirculationYear | mean.duration | median.duration | IQR.duration | \n", | |
| "|---|---|---|---|---|---|---|---|---|---|---|---|\n", | |
| "| 2007 | 1124.094 days | 1106.0 days | 548.50 days | \n", | |
| "| 2008 | 1194.404 days | 1118.0 days | 515.00 days | \n", | |
| "| 2009 | 1276.963 days | 1152.0 days | 636.50 days | \n", | |
| "| 2010 | 1297.779 days | 1150.0 days | 620.50 days | \n", | |
| "| 2011 | 1188.139 days | 1115.0 days | 652.00 days | \n", | |
| "| 2012 | 1089.484 days | 1049.0 days | 537.00 days | \n", | |
| "| 2013 | 1147.821 days | 1099.0 days | 687.50 days | \n", | |
| "| 2014 | 1167.700 days | 1136.5 days | 586.25 days | \n", | |
| "| 2015 | 1249.220 days | 1153.0 days | 763.25 days | \n", | |
| "| 2016 | 1227.009 days | 1122.0 days | 644.00 days | \n", | |
| "| 2017 | 1261.437 days | 1146.0 days | 654.25 days | \n", | |
| "| 2018 | 1319.476 days | 1123.5 days | 639.00 days | \n", | |
| "\n", | |
| "\n" | |
| ], | |
| "text/plain": [ | |
| " CirculationYear mean.duration median.duration IQR.duration\n", | |
| "1 2007 1124.094 days 1106.0 days 548.50 days \n", | |
| "2 2008 1194.404 days 1118.0 days 515.00 days \n", | |
| "3 2009 1276.963 days 1152.0 days 636.50 days \n", | |
| "4 2010 1297.779 days 1150.0 days 620.50 days \n", | |
| "5 2011 1188.139 days 1115.0 days 652.00 days \n", | |
| "6 2012 1089.484 days 1049.0 days 537.00 days \n", | |
| "7 2013 1147.821 days 1099.0 days 687.50 days \n", | |
| "8 2014 1167.700 days 1136.5 days 586.25 days \n", | |
| "9 2015 1249.220 days 1153.0 days 763.25 days \n", | |
| "10 2016 1227.009 days 1122.0 days 644.00 days \n", | |
| "11 2017 1261.437 days 1146.0 days 654.25 days \n", | |
| "12 2018 1319.476 days 1123.5 days 639.00 days " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0gAAANICAMAAADKOT/pAAAAPFBMVEUAAAAAAP8A/wBNTU1o\naGh8fHyMjIyampqnp6eysrK9vb3Hx8fQ0NDT09PZ2dnh4eHp6enw8PD/AAD////51B7lAAAA\nCXBIWXMAABJ0AAASdAHeZh94AAAgAElEQVR4nO3di3aiOhhA4Uy5iEoFDu//roeACCoiJT8J\nCftbq1PbWsNYd+WSoqoBGFOuFwAIASEBAggJEEBIgABCAgQQEiCAkAABhAQIICRAACEBAggJ\nEEBIgABCAgQQEiCAkAABhAQIICRAACEBAggJEEBIgABCAgQQEiCAkAABhAQIICRAACEBAggJ\nEEBIgABCAgQQEiCAkAABhAQIICRAACEBAggJEEBIgABCAgQQEiCAkAABhAQIICRAACGtlyml\nsg9fO3397uabnz9qRWn+h+/6PnR/u0lWfl2i9xtZMBo63FHrtY/Qya/cou/363RIM21OfNf3\noUe3O1/o5I0Q0mLcUatd24fndepLSx6AH0NSt8Xf9f0KS293+kYIaTHuqNWS5smjWWea+tKq\nkNr3eaTU99XCxTfbf1jlcbPauPZG8B132FqlfmQ2D89+26PKmg8Svf50fwKo33+zX9PmUtxt\nrkyHVBeP7yzjbi0vPzUfnPLn61VZpKJ+u2dq6Nfb1Yuavy/SMM6wbO/L/74MefNr5FSsv/eC\nQ0hrNc9G5/r82KQpo8cWzseQkvFK1oeQ6sd3xt3TXf9N6fh6/Wi3j0O/3W7ePdW9h9SNM1q2\n9+V/W4as+5iSHghprebRW9XVY4Up6h+I+ceQLs0jtmoLTOu5Z6So7otoNsDS/na7R/H9ev1o\n0cehX29XL2r8tkiPccbL9rb878twZ7QSGhZCWinvni/S+96w5oEYFXWVTD5a7+/79cDnyOqn\na/bbSKp7YOth1KUJ9qzGa2bdw746talND/1yu2/DDiG140wvW/d+Yhmi5v3pZahj465Y6V5Q\nfv8tff+wis/68TgdUu9DSA9F93EbaPNovbRXyMZrZql+Nmw/Sj8N/TTWxLBDSPn79Z+vNLEM\n7YCENMJdsc5jnS7qHtPTzy9vIZXXLFFfQsrvH1fj93rfxvBdw5Wjzxtbrx9+CqmqPy7b46OJ\nZWDf3hPuinUuo0e+/nW9KKRr3H/L1He0kqyavIXnDkaDLw7p4zZS9+XJZXtdUkL6iLtinXj0\nWH7fNJkOSR/BjU+X4vXhWb999PSdj2eD6PH5aHp/wsQNDR9+3GvXfji9bO/PSNH7TUDjrljl\npsZu7Q7i6W2k6n7t+nEkZ3Znw+vH6adtpGHLZnrot9tNHnsKRov0+Pr0sj1Gm1gGQnrCXbFK\nNswNurQHcN53nemHa9R+7RY9PTz/9Iw0vdeuef6Ibu275NPQL7dzS4d95aNFeo1itGzDlt/0\nMhDSE+6KVdSwjV491rY6l/sH+kDtabwt084p6nZwLw9pOFDaHbS5f/4x2u3T0MPtPPS7AUeL\n9Bjnedn6G7l/dXoZCGmMu2KN6/hYZNo+O92G6QXdo1UfZSrvn+secv36YNQ+/heG9HgUP/1h\nQ65Go00OPdzOU0cvizQ8Yz0tW38jo8zel4GQxrgr1kjG2yj3Q7N69pvq/5oovT/oiuYhmVwf\nK07NR9GpKNvDP0tDqvNTNDXXLh5Gmxz6cf1WvzfwdZEe4zwtW38jj69OLgMhjXBX+IUH707x\nY/FKTkg7xY/FI+2c69T1UmAKIXkkHfYZYGcIySNNSPHkn7bDOUICBBASIICQAAGEBAggJEAA\nIQECCAkQQEiAAEICBBASIICQAAGEBAggJEAAIQECCAkQQEiAAEICBBASIICQAAGEBAggJEAA\nIQECCAkQQEiAAEICBBASIICQAAGEBAggJEAAIQECCAkQQEiAAEICBBASIMBCSArwzIpHuXw4\nDoYAJBESIICQAAGEBAggJEAAIQECCAlY5L/e5FcJCViEkAABhAQIICRAACEBAggJEEBIgABC\nAgQQErA5QgIEEBIggJAAAYS0lfltUwSGkLZCSIdCSFshpEMhpK0Q0qEQ0lYI6VAIaSuEdCiE\ntBVCOhRC2gohHQohbYWQDoWQAAGEBAggJEAAIc1iQwfLENIsQsIyhDSLkLAMIc0iJCxDSLMI\nCcsQ0ixCwjKENIuQsAwhzSIkLENIswgpNFv9RAkJh0JIgABCAgQQEiCAkAABhAQIICSExsmx\nBUJCaAhpc4R0BEEd7SYkuEJImyOkIyCkzRHSERDS5gjpCAhpc4R0BIS0OUI6AkLaHCHBM4QE\nCCAkQIDVkG7nVGlpdttqCMAJiyFVsRokmwyBg9jffgqLIWUquhbtpTKPVLbFEDiIQ4cUqeJx\nuVDRFkPgIA4dklKfPhAbAgdx6JB4RoKUQ4fUbCPlZXuJbSSYOXRIdTLaaxdXmwyBYzh2SPUt\na48jRemZ40gwcfCQ9jQEfEZIuxkCkMQUIUAAU4QAAUwRAgRwQBYQwBQhQADPSIAApggBApgi\nBAhgihAg4BAzG/Y3oQSh2U9Io/U+9ftb14Jv//3+/nd/E71d3njr32yGVJ2USvI+m02GmMYz\nErZmc4pQ1E20626EkD7ybHGhWd39fWlqukTtNDtC+syzxYVm9YBs+66M4pKQ5ni2uNAcTBGq\nkoSQ5ni2uNAshhSr/iBsnBDSDM8WF5rFkC7qdL9UqoSQPvNscaHZ3P2dPerJFbO/PyMkD1k9\nIFuk/aXyREgfEZKH9jOzwfIQO0ZIHiIkQAAhAQIICRBASDDDFl3L6syGJ1sMAfsIqWX1gCwh\nBYiQWjZX7Ypo/vyqAkPAOkJq2T0gO3/uIIkhYBshtezubLiMTm230RCwjJBa7LWDGUJqERLM\nEFKLkGCGkFqEBJf+/fvnehFkENJXwfys9+dfx/ViSCCkL0x+1oE8RrZDSBsLIiSvHiVOtnT+\n/fPqPppFSPMMftZePUiOFNImAxLSvPU/a79+3R4npI2GJKR5hLQlF/cQIblhumZHSDPcPSGJ\nD0pIX7CzYVMuNpAIyY21dzoh7REhecibjExD8ug/yjYStmQSkldPvYSEvfIrJI4jYZ/82j25\nFUKCIULSCAmGCEnzKKTD7KH1DR3VhARzxwlp5n9JSDDnIiP7Y87+wiAk+Gh3010PE9Ix1j0O\nY3fTXQ8S0nFW44/BxZ5CQqr9C8mnZXWBkAyGMAjJs0MdXi2sE05+oGwjEVJw2NngZAi/QvJr\nad1wcw/NjHiMkPz6HU9IS+zs/vErpNV3nlePTELykE8hGT28fHpc0pF/DhOSTw7zHw2IRyEd\naY3nGP/LkBASety1BggJHe5cIx6FxI96U9y7RggJLZ7vzfgUEmvxGyIkM36FhHnHmY64O4QU\nkiP9pcnOEFJICMkZQgoJf43vDCGFhPPDOENIISEkZwgpJITkDCGFhJCcIaSQEJIzhAQIICRA\nACEBAggJEEBIgABCAgQQEiCAkAABhAQIICRAACEBAggJEEBIgABCAgQQEiCAkALDGUzcIKSg\ncE4tVwgpKITkCiGFhPMOO0NIISEkZwgpJITkDCEFhY5cIaSgEJIrhBQYMnKDkAABhAQIsBrS\n7ZwqLc1uWw0BOGExpCpWg2STIQBHLIaUqehatJfKPFLZFkMAjlgMKVLF43Khoi2GAByxGJJS\nnz4QGwJwhGckQIDdbaS8bC+xjYTQ2Nz9nYz22sXVJkMAbtg9jpS1x5Gi9MxxJISFmQ2AAEIC\nBDBFCBDAFCFAAFOEAAEckAUEMEUIEMAzEiCAKUKAAKYIAQKYIgQIYGYDIGA/IY3W+9Tvb13z\nxptHb0wRAgQwRQgQwBQhQAAHZAEBTBECBPCMBAhgihAggClCgACmCAEC9jOzwfIQgCRCAgQQ\nEiCAkAABhAQIsDqz4ckWQwCOWAzpQkgIls1VuyKa/+MJgSEAN6xuIxXzE4MkhgCcsLuz4TKa\nt7rREIAL7LUDBBASIICQAAGEBAggJEAAIQECCAkQQEiAAEICBBASIICQAAGEBAggJEAAIQEC\nCAkQQEiAAEICBBASIICQAAGEBAggJEAAIQECCAkQQEiAAEICBBASIICQAAGmId2yRCmVZF9e\nptxkCGD/zEK6xo+XO4pzuYUiJPjGJKQyUcmlqJpL1e3cXC5dLhXgkkFIucqq0afLTIk9KRES\nPGMQUlq9fKE6mS7N6xCAJ9hrBwggJECAaUiXuNk6ilXM7m8cmmFIuWo+ivTub9GSCAmeMQwp\nUde6UHF9VYnYItWEBO8YhqSfkAqVdRfkEBI8IxBSqo8fERIOzXjVrshVVLNqh4Mz39mg1Fk/\nIUlOtSMk+MZ493ekt5Dq+Cq0PBNDAPvHAVlAgGFI8VluyveHIQAPGO+1U1u0REjwjGFI1fW0\nRUuEBM8IbCPdzrF0S4QEz8jsbCj0dLuL+dLMDAHsmUhIedKetkHumCwhwTPmIVXnSJ/6pGpq\nSmWWiZDgHePTcemdDVnRfUHs8U9I8IzpcaTmyejSn7tBT7qTQUjwjOlxpFR0jt3UEIAHTI8j\niS3IxyEAD0jNtbuJ7Wj4OASwX6YhZY9zFkst0dsQwP4ZhjR0xN8j4cgMQ4rUtU5UWSacRQiH\nJnDOhnPzbFTwp+Y4NIGQcj3Ljm0kHJphSGmzalequL4REg5N4kyr7ZRVsVeieB0C8IDp7u+z\n/uik2jOgyCEkeIaTnwACCAkQYBCSeuZ4qQCXCAkQYLpql0Z6btAtEt1pR0jwjfFcu+6PYwvZ\n3XaEBM8IzGx4viCCkOAZ40mr/TOS2J+Zvw4BeMB41S7S077zSL+2ixxCgmdMdzYk9312on8g\nS0jwjfEB2WuqMxI+BQohwTPMbAAEEBIgwCCk9PVcXJXYUVlCgmcMQspVNk6pzOROgEJI8IzJ\nql2ZqORS6Jiq27m5LPcKSYQEz5htI13jx5zVeMHT0e2cdvvKsy/nHCIkeMb41SgyfSQp+VaG\nVsWjueLzJx0iJHjG4l67TEXXbkJRmUfzk1wJCZ6xGFI/L0/7MjePkOAZiyE9TRCfny1OSPAM\nz0iAALvbSHm3h5xtJITG5hShZLTXLp59iTJCgmeszrW7Ze1xpCg9cxwJYWHSKiCAkAABxuf+\nfkxXWPCdTBFCqAxDOv/hBJFMEUK4jM8idFn8fUwRQrikzmu3AAdkES7jV+ybPR70/H3zBY7P\nI/77W9e88ebRm2FIZZQsfjlznpEQLuNVu+U7G5gihHBZDIkpQggXU4QAAcxsAASYn7JYr7Cl\nV6HFmRwC2D2pk+jPz1TolCcVnev6Eqvoy8uSERI8YxjSRbUvfZkvmeFQRbq4y5kpQgiPYUjx\n44XG4q/fl+ld3lmkTlVdZez+RlAsvvRlpLortju+OSCLoIg9I31/6Uulhn85ixDCYnEbKRqF\nVPGMhKBY3GvXbyPp17BgGwlhEXrpyyXHkdhrh3DZnNnAcSQEiylCcOrH9QIIMQhJ7zb40+zv\nTZcKXvr5CaQkQoJLhLQpQjqIn59QSiIkuPNDSP3l/oPo+8yGlUMgXD/hlCQUUsk2Ev7s5yec\nkgxCyscn0Fow+3vbpYJ/CKkzPgVxvPisXBstFbzz8xNQSRbPtLpyCMAD7LUDBEiFdEtNl+Tr\nEMB+mYaUMbMBMA5p6CgXW6SakOAd49dHutaJKstEsdcORyaw1+7cPBsVi05st2oIwAMCIeX6\nfA1sI+HQjF9o7FqXKq5vhIRDMwwp1wG1J0A5iS1STUjwjunu77P+6KTmTwpkNgSwf8xsAASY\nbiPJPhNNDQF4gEmrgADjc3/PvhbsWoQEzxiGVKWJ6JSGiSEADxiv2jFpFSAkQAS7vwEBhAQI\nICRAANtIgABCAgTIrNrdEtFznxASfCO0jVTxZxQ4NKmdDaza4dCEQrooXo0CRya2s+Estkg1\nIcE7QiHFF7Eleh0C8AAHZAEBhAQIMAspP+nXSEoy6b9JIiR4xiSkMnnsakhKyYUiJPjGIKQq\nUnGu/9K8vMaye78JCb4xCCkbne87Yfd3AAJ4BUpnDEKK1bA+V3ISff8F8VqurhiEpJ4PKEks\nzesQsImQDBAS7gJ5eXFHCAmdH0IyQUjo/FCSCaOQnjheKpj5+aEkE4SEFiGZYa4dtJ8fSjJC\nSIAAQgIEEBIggJAAAYQECCAkQAAhAQIICRBASIAAQgIEEBIOZ4tZUIS0T8x4284m8wkJaZeY\nO7ohQjoOQtrONlPcCWmP+HOGDRHScRDSdjb6qytC2iH+wm6JlfcPIR0HIS2w8g7a6g+BCWl/\n+KPvJQhpH0PsGCEtsPIOMjw3xedvIqTvLD+gOQ/JAm7un5khCekrHtA75OY3zcyQhPQVIe2P\nm+fsuREJ6RvWsXbISUizQxLSN4S0P242I2eHJKQv2OxHZ75dQvqCkNCZfxYkpHnsiUZntDo5\n9WAgpHmEhEUIadZmB8IRGELaEE9kx0FIGyKk47Aa0u2cti/ul2a3rYbYE7atDsRiSFU8eqHM\nZJMh9oWQDsRiSJmKrkV7qcwjlW0xxK6wu+9ILIYUqeJxuVDRFkPsCiEdicWQnl74fP5V0EMI\niSNQh8Iz0lYI6VDsbiPlZXvpCNtIbiYowxWbu7+T0V67uNpkCMANu8eRsvY4UpSeD3EcCQfC\nzAZAACEBApgiBAhgihAggClCgAAOyAIC9jNFaLTep35/65o33jx64xkJEMAUIQhgGhRThGCO\nCYVMEYIAQmJmA8wxx52QIICQ7IZUZXpX3TlWKrluNAQc4M+uaqshlZFSdRUxRSg0hFRbDemk\n0qr551Q2TZ3Y/R0M/hJYszqzobr/06zlcUA2GISk2Z4iFKnRB+JDwD5OTtGyumpX1PW5mydU\nzW8kERI8YzGkQkVZUadRU1Ieq3yLIQBHbO7+zqNhitB5myEAN+wekL2e2r+STc/lZkMALjCz\nARBASIAAQgIEEBIggJAAAYQECCAkQAAhAQI8C+nQ8yKxY36FdPAZxtgv70KiJOyRVyEd/o9e\nsFs+hcSfj2G3PAyJkrA/HoXEXzQvwt3jhI8h8VCZwd3jhj8hjU6ywUPlM+4dN/wJCUvwe8YR\nQgoLITlCSEFh1dcVQgoKIblCSCFhb4wzhBQSQnKGkALCAQJ3CAkQQEiAAEICBBwnJDYbsKHD\nhMQGOLZESICAo4TEPmFs6iAhcXAF2zpUSJSErRwjJA74Y2OEBAg4REjMQcPWDhESsDVCAgQQ\nEiCAkAABhAQIICRAACEBAggJEEBIgABCAgQQEiCAkAABhAQIICRAACEBAggJEEBIgABCAgQQ\nEiCAkAABhAQIICRAACEBAggJEEBIgABCAgQQEiCAkAABhAQIICRAACEBAggJEEBIgABCAgQQ\nEiCAkAABhAQIICRAACEBAggJELDTkNzY/j+GUO00pO2H2M2oCAIhuR4VQSAk16MiCITkelQE\ngZBcj4ogEJLrUREEQnI9KoJgNaTbOW2P16TZbashTBASVrMYUhWPjn0mmwxhhpCwmsWQMhVd\ni/ZSmUcq22IIM4SE1SyGFKnicblQ0RZDmCEkrGYxpKe5bPMT2wgJnuEZyfWoCILdbaS8bC+x\njYTQ2Nz9nYz22sXVJkMYISSsZvc4UtYeR4rSM8eREBZmNrgeFUEgJNejIghMEXI9KoLg9xSh\nKL3c9wNe0tn96YsQElbze4pQU+SpvXCSOHUJIWE1Lw7I/td7u56Ku9uJYkKCS/uZIjQ+L9bv\nb12P3v77/f3v/vb0+eYtU8/vTd6U4ffzduA335+RcnVp3l/U9V7mJVbRpb2Up0pFWXetMlXR\n+esC8oyE1byYIjQTUqXS5n2qyi6k9LEn49w9t2XttSJ98WtJhITVvJgiNBNSHevrNs9vbUi5\nSqq6SlSuv3St6+55SunPXlT8bfkICat5MUVoLqRM3eqbOnUhpUr32T1L9VfQ/9zqqa2yL6MC\ny3kxs2EupGuzynZunn3uyYzO4l3m5+T+2bomJGzK+5DKZosoUeV7SMnjEiFhc05C+vqY/kNI\ndaQqvQtwlEzrpOJLXhIS7PAipNkbOqlMT2+4byPlT2MQEiyxekB28YsR/SWkq2p30LU3eFVR\noY8qpd0ehoJtJFhiMaRbtElIzbNOs4l076TbMIpKfdSqcyMkWGBz1a5KVdIekRVdtWs2kqLh\nNi+xUqd2kJNSyS3vnpwWDUpIWM/uNtK1XQuTC0kWIWE1yzsbykSlFSEhONb32p1VlBMSQmN/\n93cRf/8jPEKCZ1wcRzoREkLjxRQhSwgJqxGS61ERBEJyPSqCQEiuR0UQCMn1qAgCIbkeFUHw\nO6TvZ1rVe9qXnvKOkLCa3yF9P9MqIcEKX0L69+/f1PW+nmn1LydgJSSs5kdI/zrv11NZe9bJ\nonlPSHDI95A+n2m1zvRZKB+rdgtOvEpIWM2LkP79+1DSzJlWuz+WTfuQlpx4lZCwmu8hfTzT\nanv6hiLqQ1py4lVCwmreh/TpTKtpe3rV/Hmv3fyJVwkJq3kR0sw20sczrd5DGS5+P/EqIWE1\n70P6dKbV15AWnHiVkLCaHyF9Po708UyrLyEtOfEqIWE1X0L6cD318Uyr3cXbsLOh/nriVULC\nav6H9OFMq/nrXrvvJ14lJKzmf0gfzrTaHVI69SEtOfEqIWE1/0P6dKbV+vw0s2HBiVcJCav5\nHVIIoyIIhOR6VASBkFyPiiAQkutREQRCcj0qgkBIrkdFEAjJ9agIAiG5HhVBICTXoyIIhOR6\nVATBm5B+Jq93v2KVxUrFWdV/tjtzw818VGAZX0L6+Zkq6R7Stf97vsv9s8MMVbNRgYVCCKnp\nKCvruszuJd3zyrqzCZmMCizkSUg/P5MltclUUf/nfLlqT37Sr/D95eSQk6MCSwUQ0qU9W10r\na89WR0iwzo+Qfn6mS7r/fXnRf3xr1+ZYtYN1AYQ0fuJ5PpdQ8fYNfxoVWMyLkH5+PpQ0G1Ly\nx44ICeuFGVLzTxzlr9f+46jAcj6E9PPzqaTXbaSiPaV++9lbe0oUg1GBP/AhpJnrjfbaFaXe\nv5D3n236SjcaFXgTQEj340hNONfuJSa6kAp2NsCeAELSx2H1zIbz/YR2/UbTn5+SCAmrhRCS\nLmk82e7+2eqvT0mEhNWCCKmf/Z0nw86GWm8x/e0piZCwmt8hvcknXhp2+1GBwELycFQEgZBc\nj4ogEJLrUREEQnI9KoJASK5HRRAIyfWoCAIhuR4VQSAk16MiCITkelQEgZBcj4ogEJLrUREE\nQnI9KoJASK5HRRAIyfWoCAIhuR4VQSAk16MiCDsNCfDMike5fDg2uFjso4x5mP+o6JiExJh7\nGNT7MQmJMfcwqPdjEhJj7mFQ78ckJMbcw6Dej0lIjLmHQb0fk5AYcw+Dej8mITHmHgb1fkxC\nYsw9DOr9mITEmHsY1PsxCYkx9zCo92MSEmPuYVDvx/Q0JGBfCAkQQEiAAEICBBASIICQAAGE\nBAggJEAAIQECCAkQQEiAAEICBBASIICQAAGEBAggJECADyFdYhVlVXsxiyYuVqPPbjtm8/n+\nDnMx5ugK9gZt3IQfJV/HLE5KnUqrY5o+iDwIKWtfHyDS/8ekvRjXTxfLqLuC5B0/PWbzE+5f\nqODps5bGHF3B3qCNKpJ9lHwdM5f/j34b0/hBtP+QCnWq9C+Ok/7NGBV1Eanb08WTymp9T522\nHrPW77s77OmzlsYcXcHeoFq65mVOTMaMms9WaftjtTWm8YNo/yGl3SLq/3Cm8ubSVZ2fLqrh\nCtuO2fwgkvso48/aGjOV/39+H7T9lGxIX8e8tg/qSkUWxzR+EO0/pDv9X0yVfuYtVPp08b7i\nIXm/T49ZNz/gx4N6+KytMUdXsDloOY7KypgnVWww3vyYxg8iX0KqVPL0a2N08Xx/VhZ8dpge\nsy5ef3PJP8JmxhxdweagiSq3CGlmzFjV56hdF7M3pvGDyJeQLvoJeTqk+qI3FKPL5mOO3m8W\n0syYoytYHPSsrps8Cc7euWm74W9zTOMHkSchlVH7TDwd0rnd4SL+hPQ2Zv32ofgjbG7M0RXs\nDdquAm0Q0vydq3c2nMR/orN3rumDyI+Qqqhdn5kO6aKflZv7Xfgp6X3M9w+lH2GzY46uYG/Q\nWO8x3mANdv7O1dtIpezBhS9jGj+I/Agp6e7TaLgPRhdjpdemK+n7/X3M0ftom5Bmxxxdwdqg\np3ZFUj6k2f/oRr+lZsc0fhD5EFIZJ91xsm6HSznstStHKx6y9/vUmKNRXj5rZczRFawNqh7s\njbnJfv5vYx5h93f+2E91bn8/5vpZeHSx+xUjethhekztsZPn6bNWxhxdwdqgm4S07M4tRf+3\n38Y0fhDtP6TRHTo9syFTeopUJvmgnh5Tu9/vG8xs+Dqm7CNr4aBvly2M2WwdVXp75WpxTOMH\n0f5DOo1+J8bthfY+GV1MhovbjlkPj6nY/pjjK1gb9O2yjTHPDn6gpg+i/Yc0Xrnopui2nx5d\nrEcXNx2zHu73yv6YW6xlff+Pvl62MmaeWP+Bmj6I9h8S4AFCAgQQEiCAkAABhAQIICRAACEB\nAggJEEBIgABCAgQQEiCAkAABhAQIICRAACEBAggJEEBIgABCAgQQEiCAkAABhAQIICRAACEB\nAggJEEBIgABCAgQQEiCAkAABhAQIICRAACEBAggJEEBIgABCcqU4ReqUtxe/vCLe65fz799z\nGl5vPVWndQuIvyAkR7LupRhj/Vr1fwspVhOffBWrS3fhouL1C4nFCMmNs4qaJ5aqeVd+ve5L\nM4te0bVQqr3hUqli1QLibwjJibIP6LRgxWtNSE2p91d8P/954bACITmR9Y/vKr10aShVxe2G\nTRapZFjf677UXMhT1b3q9v3FubsvX2IVX7rrlamKxtG0CZ37F7xvrhh1K3uPGxqGhDlCciJ5\nWuHqwmge4Jn+SiOqXkM6d5tU2XNI7ZXbVprv0RdHJTUrdXV1X8Gr08cVhxt6DAkBhOTE8+pZ\nF0bS1FNf9btTF8zwJf121V9U9fgLVxUVdRHpL7Xf/rxj4aLO2X2XQ66/WiUqf7mhdkhIICQn\npkK66Yupflep6G3Vbvi24QupLkNX0n/7880mql+xS5UuphrW4+43dNvgv3ZQhOTEVEgvX3gN\nqczPyUtIo+8ZPoHqAa0AAAFqSURBVDloVuyq/rbuJm4IIrgvnUgf20h5tSyk5FHC4pCGD8ch\nvd4QRHBfOnHudwvc9GbNgpBOKr7k5fqQHp97uyGI4L504nEcKVGXpyiS522k27Czoe52xE1t\nI6XfQuquOHyOkMRxX7pxamc26GM/9VNIF70nLdN77fQknyoZQrrVRb9p0x9metprV9efQ2qv\n2Nx2+nJD1v674eO+dCR5mWvXP6ofx5Eu+kLaf+k+NU8/X8Xq8YQ1Po5U159Dul9RPwuOboiQ\nBHFfunJtIkmu7cWnfdzNIz1tV/vOkToNXzo11761a3G3eNg7fokeMxuGfx9GH16a/E7tzQ43\nREiCuC8BAYQECCAkQAAhAQIICRBASIAAQgIEEBIggJAAAYQECCAkQAAhAQIICRBASIAAQgIE\nEBIggJAAAYQECCAkQAAhAQIICRBASIAAQgIEEBIggJAAAYQECCAkQAAhAQIICRBASIAAQgIE\n/A99313lCiTW2gAAAABJRU5ErkJggg==", | |
| "text/plain": [ | |
| "Plot with title \"Actual Project Duration\"" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# (c)\n", | |
| "\n", | |
| "# I did a year-by-year analysis, but this was not necessary.\n", | |
| "\n", | |
| "# Create an actual duration column\n", | |
| "df_q1$ActualDuration <- df_q1$RevisedCompletionDate - df_q1$ApprovalDate\n", | |
| "\n", | |
| "# Actual duration statistics\n", | |
| "mean(df_q1$ActualDuration) \n", | |
| "median(df_q1$ActualDuration)\n", | |
| "quantile(df_q1$ActualDuration)\n", | |
| "IQR(df_q1$ActualDuration) \n", | |
| "\n", | |
| "ActualDurationByYear <- df_q1 %>% \n", | |
| " group_by(CirculationYear) %>% \n", | |
| " summarise(mean.duration = mean(ActualDuration), \n", | |
| " median.duration = median(ActualDuration),\n", | |
| " IQR.duration = quantile(ActualDuration, 0.75) - quantile(ActualDuration, 0.25))\n", | |
| "ActualDurationByYear\n", | |
| "\n", | |
| "plot(ActualDurationByYear$CirculationYear, \n", | |
| " ActualDurationByYear$mean.duration, pch=15, col=\"red\", ylim=c(300, 1300),\n", | |
| " xlab=\"Circulation Year\", ylab=\"Duration (days)\", main=\"Actual Project Duration\")\n", | |
| "points(ActualDurationByYear$CirculationYear, \n", | |
| " ActualDurationByYear$median.duration, pch=16, col=\"blue\")\n", | |
| "points(ActualDurationByYear$CirculationYear, \n", | |
| " ActualDurationByYear$IQR.duration, pch=17, col=\"green\")\n", | |
| "legend(\"bottomleft\", pch=c(15,16,17), \n", | |
| " col=c(\"red\",\"blue\",\"green\"), \n", | |
| " legend=c(\"Mean\", \"Median\", \"IQR\"))\n", | |
| "grid(nx=NA, ny=NULL)\n", | |
| "\n", | |
| "# RESPONSE:\n", | |
| "\n", | |
| "# A similar analysis as shown in 1(b)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Question 2" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 89, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.509517Z", | |
| "start_time": "2020-01-03T03:46:15.114Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "df_q1$binDelay <- df_q1$Delay\n", | |
| "for (i in 1:nrow(df_q1)) {\n", | |
| " if (df_q1$Delay[i] <= 365) df_q1$binDelay[i] <- \"<1\"\n", | |
| " else if (df_q1$Delay[i] > 365 & df_q1$Delay[i] <= 2*365) df_q1$binDelay[i] <- \"1-2\"\n", | |
| " else if (df_q1$Delay[i] > 2*365 & df_q1$Delay[i] <= 3*365) df_q1$binDelay[i] <- \"2-3\"\n", | |
| " else df_q1$binDelay[i] <- \"3+\"\n", | |
| "}\n", | |
| "df_q1$binDelay <- as.factor(df_q1$binDelay)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 90, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.542404Z", | |
| "start_time": "2020-01-03T03:46:15.117Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[1] \"Distribution of Project Delays\"\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "\n", | |
| " <1 1-2 2-3 3+ \n", | |
| "41.59960 30.03018 16.75050 11.61972 " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# The most straightforward way is...\n", | |
| "print(\"Distribution of Project Delays\")\n", | |
| "prop.table(table(df_q1$binDelay)) * 100 \n", | |
| "\n", | |
| "# 41.6% of projects were delayed by less than a year. \n", | |
| "# 30.0% between 1 and 2 years, 16.7% between 2 and 3, \n", | |
| "# and 11.6% by more than 3 years \n", | |
| "# A large majority of projects are delayed by less than 2 years, \n", | |
| "# but a sizeable minority is delayed by more than 3 years." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Question 3" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 91, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.576343Z", | |
| "start_time": "2020-01-03T03:46:15.124Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[1] \"Distribution of Project Ratings (CDTA only)\"\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "\n", | |
| " <1 1-2 2-3 3+ \n", | |
| "54.54545 29.38689 11.83932 4.22833 " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# Only PATA projects\n", | |
| "df_CDTA <- df_q1[which(df_q1$Type == \"CDTA\"),]\n", | |
| "\n", | |
| "print(\"Distribution of Project Ratings (CDTA only)\")\n", | |
| "table(df_CDTA$binDelay) / length(df_CDTA$binDelay) * 100 \n", | |
| "\n", | |
| "# 54.5% of projects were delayed by less than a year. \n", | |
| "# 29.3% between 1 and 2 years, 11.8% between 2 and 3, \n", | |
| "# and 4.2% by more than 3 years.\n", | |
| "# CDTA projects seem to suffer from shorter delays than other project types. \n", | |
| "# A majority of projects is delayed by less than a year." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Question 4" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 92, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.615234Z", | |
| "start_time": "2020-01-03T03:46:15.130Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "\n", | |
| " <1 1-2 2-3 3+ \n", | |
| "0.5061567 1.1436184 2.3266129 2.8506248 " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# There are several ways to do it. Here's a way:\n", | |
| "df_TopQuant <- df_q1[which(df_q1$RevisedAmount >= quantile(df_q1$RevisedAmount, 0.75)),]\n", | |
| "df_BottomQuant <- df_q1[which(df_q1$RevisedAmount <= quantile(df_q1$RevisedAmount, 0.25)),]\n", | |
| "\n", | |
| "# We need to take advantage of the coarsened variable we created above.\n", | |
| "(prop.table(table(df_TopQuant$binDelay)) * 100) /\n", | |
| " (prop.table(table(df_BottomQuant$binDelay)) * 100)\n", | |
| "# Projects with larger budgets are 50% less likely to be delayed by less than a year.\n", | |
| "# They are 2.8x as likely to be delayed by more than 3 years when compared with lower-budget projects." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 93, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.645129Z", | |
| "start_time": "2020-01-03T03:46:15.138Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "\n", | |
| " AED* AGD AGDX ARDD AWD AWD* BDCO \n", | |
| " 0 0 0 0 0 0 0 0 \n", | |
| " BPMS CPSO CRPN CTL CWRD DOC EARD ECRD** \n", | |
| " 0 0 0 -6 3 0 -41 -2 \n", | |
| " ERCD IDBD IED IFD IRDD IWD MKRD OAG \n", | |
| " -8 0 8 0 0 0 0 -1 \n", | |
| " OAI OCO OCRP OESD OGC OOMP OPO OPPP \n", | |
| " -2 0 -2 0 -4 0 0 0 \n", | |
| " OREI ORM PARD PPFD PSD PSG PSOD RMU \n", | |
| " -8 0 -8 -2 0 0 -11 0 \n", | |
| " SARD SDCC SERD SERD** SFSP-AUS SPD TD VPO1 \n", | |
| " -27 37 61 0 -2 -4 0 0 \n", | |
| " VPPC VPW \n", | |
| " 0 0 " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# Compare other features\n", | |
| "# I will look at count instead of percentages because, given that there are \n", | |
| "# more categories, the percentages might be misleading (e.g., a difference of 100%\n", | |
| "# because there is one project of a given type )\n", | |
| "# Dept\n", | |
| "table(df_TopQuant$Dept) - table(df_BottomQuant$Dept)\n", | |
| "# More projects in the top quantile are within the SDCC and SERD, whereas\n", | |
| "# more projects in the bottom quantile are in the SARD, EARD and PSOD departments." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 94, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.675049Z", | |
| "start_time": "2020-01-03T03:46:15.147Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "\n", | |
| " 14 15 16 17 18 19 \n", | |
| "-1.4927042 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 \n", | |
| " 20 21 22 23 24 25 26 \n", | |
| " 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 \n", | |
| " 27 28 29 30 C001 C002 C003 \n", | |
| " 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.3584229 -0.3112130 \n", | |
| " C004 C005 C006 C007 C008 C009 C010 \n", | |
| " 0.0000000 0.0000000 0.7227471 0.0000000 0.0000000 0.5435357 0.0000000 \n", | |
| " C011 C012 C013 \n", | |
| " 0.1792115 0.0000000 0.0000000 " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# Cluster\n", | |
| "(table(df_TopQuant$Cluster) / length(df_TopQuant$Cluster) * 100) - \n", | |
| " (table(df_BottomQuant$Cluster) / length(df_BottomQuant$Cluster) * 100)\n", | |
| "# A few significant differences percentage-wise, but if you look at the counts you will\n", | |
| "# see that there's a lot of missing data here, so not very conclusive." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 95, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.703996Z", | |
| "start_time": "2020-01-03T03:46:15.151Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "\n", | |
| " AFG ARM AZE BAN BHU BRU \n", | |
| " 2.538466795 0.000000000 0.191013958 -1.685892299 -1.888708746 0.000000000 \n", | |
| " CAM COO FIJ FSM GEO IND \n", | |
| "-0.269904276 -0.519930676 -0.167408981 -0.693240901 -0.340719206 3.194126088 \n", | |
| " INO KAZ KGZ KIR KOR LAO \n", | |
| " 3.284818894 -0.161507737 -1.207270333 0.185112714 0.000000000 0.578943118 \n", | |
| " MAL MLD MON MYA NAU NEP \n", | |
| "-0.346620451 -1.380580558 0.255927645 0.059012442 -0.866551127 -2.731654895 \n", | |
| " PAK PAL PHI PNG PRC REG \n", | |
| "-0.766229975 0.005901244 1.534012908 0.387929160 -9.181714839 13.146108595 \n", | |
| " RMI SAM SIN SOL SRI TAJ \n", | |
| "-0.693240901 -0.161507737 0.000000000 -0.334817962 -0.484523211 0.202816446 \n", | |
| " TAP THA TIM TKM TON TUV \n", | |
| " 0.000000000 -0.472720722 -0.328916718 -0.173310225 -0.167408981 -0.346620451 \n", | |
| " UZB VAN VIE \n", | |
| "-1.201369089 -1.039861352 2.048042340 " | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "# Country\n", | |
| "(table(df_TopQuant$Country) / length(df_TopQuant$Country) * 100) - \n", | |
| " (table(df_BottomQuant$Country) / length(df_BottomQuant$Country) * 100)\n", | |
| "# Several differences here.\n", | |
| "\n", | |
| "# Hence, the budget seems to be heavily correated with the delay. However,\n", | |
| "# it might be associated with some other project characteristics, such as country and\n", | |
| "# department. Nevertheless, we would not be able to draw causal conclusions given that\n", | |
| "# the data is observational and neither unobservable nor observable characteristics\n", | |
| "# are balanced across the \"treatment\" and control group." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Question 5\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 96, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-01-03T03:46:22.739911Z", | |
| "start_time": "2020-01-03T03:46:15.160Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# (5)\n", | |
| "# (a) decision problem or objective?\n", | |
| "# R: Maximize project ratings.\n", | |
| "# (b) lever or levers?\n", | |
| "# R: Budget alocation (we may want to consider levers which describe how the budget is spent as well)\n", | |
| "# (c) ideal RCT design?\n", | |
| "# R: Allocate budget randomly, which will in expectation make projects with more and less\n", | |
| "# budget comparable because their other characteristics will overlap, and then measure\n", | |
| "# the difference in ratings. \n", | |
| "# (d) dependent variable(s) and independent variable(s) in the modeler\n", | |
| "# R: The independent variable will be the budget allocated, whereas the dependent variable\n", | |
| "# will be the project rating.\n", | |
| "# (e) And---Why would running RCTs and modeling/optimizing over RCT results be preferable \n", | |
| "# to using (observational, non-RCT) \"foo\" data?\n", | |
| "# R: Because in observational data, projects with higher and lower budget allocations\n", | |
| "# are not guaranteed to be comparable in expectation. For example, such projects may be deployed\n", | |
| "# in different regions of the world, which might affect the difference in ratings. Hence,\n", | |
| "# We cannot infer that the difference in ratings is due to the difference in budget given that\n", | |
| "# there are confounding variables." | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "R [conda env:renv]", | |
| "language": "R", | |
| "name": "conda-env-renv-r" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": "r", | |
| "file_extension": ".r", | |
| "mimetype": "text/x-r-source", | |
| "name": "R", | |
| "pygments_lexer": "r", | |
| "version": "3.5.1" | |
| }, | |
| "toc": { | |
| "base_numbering": 1, | |
| "nav_menu": {}, | |
| "number_sections": true, | |
| "sideBar": true, | |
| "skip_h1_title": false, | |
| "title_cell": "Table of Contents", | |
| "title_sidebar": "Contents", | |
| "toc_cell": false, | |
| "toc_position": {}, | |
| "toc_section_display": true, | |
| "toc_window_display": false | |
| }, | |
| "varInspector": { | |
| "cols": { | |
| "lenName": 16, | |
| "lenType": 16, | |
| "lenVar": 40 | |
| }, | |
| "kernels_config": { | |
| "python": { | |
| "delete_cmd_postfix": "", | |
| "delete_cmd_prefix": "del ", | |
| "library": "var_list.py", | |
| "varRefreshCmd": "print(var_dic_list())" | |
| }, | |
| "r": { | |
| "delete_cmd_postfix": ") ", | |
| "delete_cmd_prefix": "rm(", | |
| "library": "var_list.r", | |
| "varRefreshCmd": "cat(var_dic_list()) " | |
| } | |
| }, | |
| "types_to_exclude": [ | |
| "module", | |
| "function", | |
| "builtin_function_or_method", | |
| "instance", | |
| "_Feature" | |
| ], | |
| "window_display": false | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment