Skip to content

Instantly share code, notes, and snippets.

@rbiswasfc
Created December 14, 2024 00:16
Show Gist options
  • Save rbiswasfc/8d2bfec5c2a358e8beeb2df390111f9d to your computer and use it in GitHub Desktop.
Save rbiswasfc/8d2bfec5c2a358e8beeb2df390111f9d to your computer and use it in GitHub Desktop.
clustering - eedi
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": false
},
"id": "99a62e5d",
"cell_type": "code",
"source": "import os\nimport ast\nimport pandas as pd\nimport numpy as np\nimport kagglehub\nimport random\nfrom copy import deepcopy",
"execution_count": 1,
"outputs": []
},
{
"metadata": {},
"id": "f6c4cb97",
"cell_type": "markdown",
"source": "# Data"
},
{
"metadata": {
"trusted": false
},
"id": "50740fae",
"cell_type": "code",
"source": "data_dir = \"/Users/rajabiswas/.cache/kagglehub/competitions/eedi-mining-misconceptions-in-mathematics\"\n\ndf = pd.read_csv(os.path.join(data_dir, \"train.csv\"))\ncontent_df = pd.read_csv(os.path.join(data_dir, \"misconception_mapping.csv\"))\n\ncontent_df[\"MisconceptionId\"] = content_df[\"MisconceptionId\"].astype(str)\nid2name = dict(zip(content_df[\"MisconceptionId\"], content_df[\"MisconceptionName\"]))",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "c4f89f71",
"cell_type": "code",
"source": "misconception_ids = set()\nfor col in [\"MisconceptionAId\", \"MisconceptionBId\", \"MisconceptionCId\", \"MisconceptionDId\"]:\n misconception_ids.update(df[col].dropna().astype(int).unique())\nmisconception_ids = list(map(str, misconception_ids))\nprint(f\"Number of unique MisconceptionIds: {len(misconception_ids)}\")",
"execution_count": 3,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "Number of unique MisconceptionIds: 1604\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "72e72526",
"cell_type": "code",
"source": "missing_ids = content_df[~content_df[\"MisconceptionId\"].isin(misconception_ids)][\"MisconceptionId\"].unique().tolist()\nlen(missing_ids)",
"execution_count": 4,
"outputs": [
{
"data": {
"text/plain": "983"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "3859a796",
"cell_type": "code",
"source": "all_ids = missing_ids + misconception_ids\nlen(all_ids)",
"execution_count": 5,
"outputs": [
{
"data": {
"text/plain": "2587"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {},
"id": "13a675be",
"cell_type": "markdown",
"source": "# Groups"
},
{
"metadata": {
"trusted": false
},
"id": "d131fe40",
"cell_type": "code",
"source": "ranker_df = pd.read_csv(\"../data/ranker_oof_df.csv\")\nranker_df[\"MisconceptionId\"] = ranker_df[\"MisconceptionId\"].apply(ast.literal_eval)\nranker_df[\"MisconceptionId\"] = ranker_df[\"MisconceptionId\"].apply(lambda x: [str(y) for y in x if str(y) in all_ids])\ngroups = ranker_df[\"MisconceptionId\"].apply(lambda x: x[:8]).values.tolist()",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "e1ab56f0",
"cell_type": "code",
"source": "from collections import defaultdict\nimport numpy as np\nfrom sklearn.cluster import AgglomerativeClustering\n\ndef build_cooccurrence_matrix():\n cooccurrence = defaultdict(lambda: defaultdict(int))\n for group in groups:\n for i in range(len(group)):\n for j in range(i + 1, len(group)):\n cooccurrence[group[i]][group[j]] += 1\n cooccurrence[group[j]][group[i]] += 1\n return cooccurrence\n\n\ndef create_similarity_matrix(all_ids, cooccurrence):\n n = len(all_ids)\n id_to_index = {id: i for i, id in enumerate(all_ids)}\n similarity_matrix = np.zeros((n, n))\n\n for i, id1 in enumerate(all_ids):\n for j, id2 in enumerate(all_ids):\n if i != j:\n similarity_matrix[i, j] = cooccurrence[id1][id2]\n\n return similarity_matrix\n\n\ndef display_clusters_with_names(clusters, num_to_show=5):\n random.shuffle(clusters)\n for i, cluster in enumerate(clusters[:num_to_show], 1):\n print(f\"\\nCluster {i}:\")\n for id in cluster:\n name = id2name.get(id, \"Unknown\")\n status = \"Missing\" if id in missing_ids else \"Present\"\n print(f\"[{id:<5} - {status:<8}] >> {name}\")",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "5e256bc3",
"cell_type": "code",
"source": "def create_diverse_clusters(min_cluster_size=16, max_cluster_size=24, min_missing=3, min_present=3):\n all_ids = list(set(misconception_ids + missing_ids))\n cooccurrence = build_cooccurrence_matrix()\n\n id_occurrences = {id: sum(cooccurrence[id].values()) for id in all_ids}\n sorted_ids = sorted(id_occurrences, key=id_occurrences.get, reverse=True)\n\n clusters = []\n\n for id in sorted_ids:\n cluster = [id]\n related_ids = sorted(cooccurrence[id], key=cooccurrence[id].get, reverse=True)\n\n missing_count = 1 if id in missing_ids else 0\n present_count = 1 if id not in missing_ids else 0\n\n for related_id in related_ids:\n if len(cluster) >= max_cluster_size:\n break\n\n if related_id in missing_ids and missing_count < min_missing:\n cluster.append(related_id)\n missing_count += 1\n elif related_id not in missing_ids and present_count < min_present:\n cluster.append(related_id)\n present_count += 1\n elif len(cluster) < min_cluster_size:\n cluster.append(related_id)\n if related_id in missing_ids:\n missing_count += 1\n else:\n present_count += 1\n\n if len(cluster) >= min_cluster_size and missing_count >= min_missing and present_count >= min_present:\n clusters.append(cluster)\n\n return clusters",
"execution_count": 15,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "319fee67",
"cell_type": "code",
"source": "diverse_clusters = create_diverse_clusters()\nlen(diverse_clusters)",
"execution_count": 16,
"outputs": [
{
"data": {
"text/plain": "2438"
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "7d7f6008",
"cell_type": "code",
"source": "display_clusters_with_names(diverse_clusters, num_to_show=1)",
"execution_count": 17,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "\nCluster 1:\n[104 - Missing ] >> Assumes a sequence is linear\n[1582 - Present ] >> Thinks terms in linear sequence are in direct proportion\n[2472 - Present ] >> Does not know how to find the next term in a sequence\n[939 - Missing ] >> Describes position to term rule using term to term rule\n[1198 - Present ] >> Does not recognise that a linear sequence must increase or decrease by same amount\n[108 - Present ] >> Uses only the first two terms of a sequence to work out a term-to-term rule\n[2377 - Present ] >> Does not know the definition of term or term-to-term rule\n[692 - Missing ] >> Only uses two terms of a sequence to work out a term-to-term rule\n[2284 - Present ] >> When asked for a specific term in a sequence just gives the next term\n[1055 - Present ] >> Does not notice all changes in a sequence of visual patterns\n[2253 - Missing ] >> Believes the position number in a sequence does not have to be an integer\n[2407 - Missing ] >> Believes an arithmetic sequence can have a common second difference rather than first difference\n[1866 - Present ] >> Confuses additive and multiplicative relationships\n[2386 - Present ] >> Believes there is a multiplicative relationship between values of terms in a linear sequence e.g. 10th term is double 5th term\n[713 - Present ] >> When asked for a specific term in a sequence gives a different term, which is not the next\n[1720 - Present ] >> Identifies the term-to-term rule rather than the next term\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "390dcc2b",
"cell_type": "code",
"source": "import matplotlib.pyplot as plt\n\ncluster_lengths = [len(cluster) for cluster in diverse_clusters]\nplt.figure(figsize=(10, 6))\nplt.hist(cluster_lengths, bins=range(min(cluster_lengths), max(cluster_lengths) + 2, 1), edgecolor=\"black\")\nplt.title(\"Distribution of Cluster Lengths\")\nplt.xlabel(\"Cluster Length\")\nplt.ylabel(\"Frequency\")\nplt.xticks(range(min(cluster_lengths), max(cluster_lengths) + 1))\nplt.show()\n\nprint(f\"Min cluster length: {min(cluster_lengths)}\")\nprint(f\"Max cluster length: {max(cluster_lengths)}\")\nprint(f\"Mean cluster length: {np.mean(cluster_lengths):.2f}\")\nprint(f\"Median cluster length: {np.median(cluster_lengths)}\")",
"execution_count": 18,
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1sAAAIhCAYAAAC48qAWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABCmElEQVR4nO3deVwW9f7//+cVO4SXLAKSSC645JbpySVzyRW33D5apmlp2dEsF44d63TCjh8tTbMyzeMxtVwq/WhZFqm55VEzNSzTTMu1QFwQBBUQ5vdHP65vl6DCJW8v0cf9dpvbzZl5zcxrLibj6cy8L5tlWZYAAAAAACXqNnc3AAAAAAA3I8IWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgC4aN68ebLZbI7J19dXERERatWqlSZOnKiUlJQC28THx8tmsxXrOOfOnVN8fLzWr19frO0KO9add96pzp07F2s/V7No0SJNmzat0HU2m03x8fEleryS9tVXX6lhw4YKCAiQzWbTxx9/fMX648eP6+9//7vq1Kmj22+/Xb6+voqJidGzzz6r/fv3O+pc+VkXx+eff37dP9v8czp58uR1PW5R7dmzR/Hx8Tp06FCBdS1btlTt2rWvf1MAbmme7m4AAEq7uXPnqkaNGsrJyVFKSoo2bdqkV199Va+99po+/PBDtWnTxlE7ePBgdejQoVj7P3funMaNGyfpj18Yi8qVY7li0aJF2r17t0aMGFFg3ZYtW1ShQgXjPbjKsiz17t1b1apV04oVKxQQEKDq1atftn7btm3q3LmzLMvS008/rSZNmsjb21v79u3TggULdO+99yo1NfW69P7555/r7bffvuHD7PW0Z88ejRs3Ti1bttSdd97p7nYAgLAFANeqdu3aatiwoWO+Z8+eGjlypJo1a6YePXpo//79Cg8PlyRVqFDBePg4d+6c/P39r8uxrqZx48ZuPf7V/P777zp9+rS6d++u1q1bX7E2PT1dDz74oHx9fbV582anz7Zly5YaMmSIli5darpl4/KvHwDAteMxQgAwoGLFipoyZYrOnj2rWbNmOZYX9mjZ2rVr1bJlS4WEhMjPz08VK1ZUz549de7cOR06dEjlypWTJI0bN87xyOLAgQOd9rdz50716tVLQUFBqlKlymWPlW/58uWqW7eufH19VblyZb355ptO6/Mfkbz0caz169fLZrM5Hmls2bKlVq5cqcOHDzs9UpmvsMcId+/erQcffFBBQUHy9fXV3Xffrfnz5xd6nMWLF+uFF15QZGSkypQpozZt2mjfvn2X/+D/ZNOmTWrdurUCAwPl7++vpk2bauXKlY718fHxjsD03HPPyWazXfFuyOzZs5WcnKxJkyZdNsT26tXrij1d7rHKO++80/Ezlf4IPHFxcapUqZJ8fX0VHByshg0bavHixZKkgQMH6u2333bsM3/K/3lZlqUZM2bo7rvvlp+fn4KCgtSrVy/9+uuvTsfNf7Ru48aNatq0qfz9/fX4449f8RyKYvv27eratauCg4Pl6+ur+vXr66OPPnKqyb/G1q1bp7/+9a8KDQ1VSEiIevTood9//92pNisrS6NHj1ZERIT8/f3VvHlz7dixw+lzmzdvnv7nf/5HktSqVSvHZzJv3jynfX377be6//775e/vr8qVK+uVV15RXl6eY31eXp7Gjx+v6tWry8/PT2XLllXdunX1xhtvXPPnAuDWw50tADCkY8eO8vDw0MaNGy9bc+jQIXXq1En333+/3n33XZUtW1a//fabEhISlJ2drfLlyyshIUEdOnTQoEGDNHjwYElyBLB8PXr00EMPPaSnnnpKmZmZV+wrMTFRI0aMUHx8vCIiIrRw4UI9++yzys7OVlxcXLHOccaMGXryySf1yy+/aPny5Vet37dvn5o2baqwsDC9+eabCgkJ0YIFCzRw4EAdP35cY8aMcap//vnndd999+k///mP0tPT9dxzz6lLly7au3evPDw8LnucDRs2qG3btqpbt67mzJkjHx8fzZgxQ126dNHixYvVp08fDR48WPXq1VOPHj00fPhw9e3bVz4+Ppfd56pVq+Th4aEuXboU/QNy0ahRo/T+++9r/Pjxql+/vjIzM7V7926dOnVKkvTiiy8qMzNTS5cu1ZYtWxzblS9fXpI0ZMgQzZs3T88884xeffVVnT59Wi+//LKaNm2qXbt2Oe60SlJSUpL69eunMWPGaMKECbrttmv7d9h169apQ4cOatSokd555x3Z7XZ98MEH6tOnj86dO+cUKqU/Hnft1KmTFi1apKNHj+pvf/ub+vXrp7Vr1zpqHnvsMX344YcaM2aMHnjgAe3Zs0fdu3dXenq6o6ZTp06aMGGCnn/+eb399tu65557JMnxjw+SlJycrEceeUSjR4/WSy+9pOXLl2vs2LGKjIzUo48+KkmaNGmS4uPj9Y9//EPNmzdXTk6OfvrpJ505c+aaPhcAtygLAOCSuXPnWpKsb7/99rI14eHhVs2aNR3zL730kvXnv3qXLl1qSbISExMvu48TJ05YkqyXXnqpwLr8/f3zn/+87Lo/i46Otmw2W4HjtW3b1ipTpoyVmZnpdG4HDx50qlu3bp0lyVq3bp1jWadOnazo6OhCe7+074ceesjy8fGxjhw54lQXGxtr+fv7W2fOnHE6TseOHZ3qPvroI0uStWXLlkKPl69x48ZWWFiYdfbsWceyixcvWrVr17YqVKhg5eXlWZZlWQcPHrQkWZMnT77i/izLsmrUqGFFRERctS5fYZ//5X6O0dHR1oABAxzztWvXtrp163bF/Q8bNqzA/i3LsrZs2WJJsqZMmeK0/OjRo5afn581ZswYx7IWLVpYkqyvvvqqCGf0/87pxIkTl62pUaOGVb9+fSsnJ8dpeefOna3y5ctbubm5lmX9v2ts6NChTnWTJk2yJFlJSUmWZVnWjz/+aEmynnvuOae6xYsXW5KcPrclS5YUuD4vPddvvvnGafldd91ltW/f3qnPu++++/IfAgAUA48RAoBBlmVdcf3dd98tb29vPfnkk5o/f36Bx7yKqmfPnkWurVWrlurVq+e0rG/fvkpPT9fOnTtdOn5RrV27Vq1bt1ZUVJTT8oEDB+rcuXNOd2kkqWvXrk7zdevWlSQdPnz4ssfIzMzUN998o169eun22293LPfw8FD//v117NixIj+K6C733nuvvvjiC/3973/X+vXrdf78+SJv+9lnn8lms6lfv366ePGiY4qIiFC9evUKjGoZFBSkBx54oET6PnDggH766Sc98sgjkuR0/I4dOyopKanAZ3+1n/GGDRskSb1793aq69Wrlzw9i/eATkREhO69994Cx/vz9XTvvfdq165dGjp0qL788kunu2cAUFyELQAwJDMzU6dOnVJkZORla6pUqaI1a9YoLCxMw4YNU5UqVVSlSpVivx+S//hYUURERFx2Wf5jaqacOnWq0F7zP6NLjx8SEuI0n/+Y35XCR2pqqizLKtZxiqJixYo6ceLEVR/TLAlvvvmmnnvuOX388cdq1aqVgoOD1a1bN6eh5S/n+PHjsixL4eHh8vLycpq2bt1aYNj24lw7RTm2JMXFxRU49tChQyWpwPGv9jPO/1n9+dFHSfL09Cyw7dUUVu/j4+N0PY0dO1avvfaatm7dqtjYWIWEhKh169bavn17sY4FABLvbAGAMStXrlRubu5Vh2u///77df/99ys3N1fbt2/XW2+9pREjRig8PFwPPfRQkY5VnO9zSk5Ovuyy/F9GfX19Jf0xMMGfXev3K4WEhCgpKanA8vwBEUJDQ69p/9Ifd2puu+22Ej9O+/bttWrVKn366adF/rlcysfHp8BnKhUMfwEBARo3bpzGjRun48ePO+5ydenSRT/99NMVjxEaGiqbzaavv/660HfQLl1Wkt8Flv+5jh07Vj169Ci05kpD6xcm/5o8fvy47rjjDsfyixcvGvnHAU9PT40aNUqjRo3SmTNntGbNGj3//PNq3769jh49ykiNAIqFO1sAYMCRI0cUFxcnu92uIUOGFGkbDw8PNWrUyDHKXP4jfUW5m1McP/74o3bt2uW0bNGiRQoMDHQMKpA/Kt/333/vVLdixYoC+7v0zsCVtG7dWmvXri0w2tx7770nf3//EhkqPiAgQI0aNdKyZcuc+srLy9OCBQtUoUIFVatWrdj7HTRokCIiIjRmzBj99ttvhdYsW7bsivu48847C3yma9euVUZGxmW3CQ8P18CBA/Xwww9r3759OnfunKTLXxf53wP222+/qWHDhgWmOnXqXPVcXVW9enXFxMRo165dhR67YcOGCgwMLNY+mzdvLkn68MMPnZYvXbpUFy9edFpW0v+tlC1bVr169dKwYcN0+vTpQr8sGQCuhDtbAHCNdu/e7XgvJSUlRV9//bXmzp0rDw8PLV++vMDIgX/2zjvvaO3aterUqZMqVqyoCxcu6N1335Ukx5chBwYGKjo6Wp988olat26t4OBghYaGuvylrZGRkeratavi4+NVvnx5LViwQKtXr9arr77q+Ff7v/zlL6pevbri4uJ08eJFBQUFafny5dq0aVOB/dWpU0fLli3TzJkz1aBBA912221O3zv2Zy+99JI+++wztWrVSv/85z8VHByshQsXauXKlZo0aZLsdrtL53SpiRMnqm3btmrVqpXi4uLk7e2tGTNmaPfu3Vq8eLFLd3Psdrs++eQTde7cWfXr13f6UuP9+/drwYIF2rVr12Xv6EhS//799eKLL+qf//ynWrRooT179mj69OkFzrtRo0bq3Lmz6tatq6CgIO3du1fvv/++mjRp4vgZ5YemV199VbGxsfLw8FDdunV133336cknn9Rjjz2m7du3q3nz5goICFBSUpI2bdqkOnXq6K9//Wuxz//PPv3000JDU69evTRr1izFxsaqffv2GjhwoO644w6dPn1ae/fu1c6dO7VkyZJiHatWrVp6+OGHNWXKFHl4eOiBBx7Qjz/+qClTpshutzuNnli7dm1J0r///W8FBgbK19dXlSpVKtbjhl26dHF8d165cuV0+PBhTZs2TdHR0YqJiSlW7wDAaIQA4KL80dTyJ29vbyssLMxq0aKFNWHCBCslJaXANpeOULdlyxare/fuVnR0tOXj42OFhIRYLVq0sFasWOG03Zo1a6z69etbPj4+TiOwXWl0uMuNRtipUydr6dKlVq1atSxvb2/rzjvvtKZOnVpg+59//tlq166dVaZMGatcuXLW8OHDrZUrVxYY7e306dNWr169rLJly1o2m83pmCpk9L0ffvjB6tKli2W32y1vb2+rXr161ty5c51q8kcjXLJkidPy/NEDL60vzNdff2098MADVkBAgOXn52c1btzY+vTTTwvdX1FGI8yXnJxsPffcc1atWrUsf39/y8fHx6patao1ZMgQ64cffnDUFfb5Z2VlWWPGjLGioqIsPz8/q0WLFlZiYmKB0Qj//ve/Ww0bNrSCgoIsHx8fq3LlytbIkSOtkydPOu1r8ODBVrly5Ryf+59Hj3z33XetRo0aOc6/SpUq1qOPPmpt377dUdOiRQurVq1aRT73/HO63JRv165dVu/eva2wsDDLy8vLioiIsB544AHrnXfecdRcbjTPwka8vHDhgjVq1CgrLCzM8vX1tRo3bmxt2bLFstvt1siRI522nzZtmlWpUiXLw8PD6Vq53LkOGDDAaTTNKVOmWE2bNrVCQ0Mtb29vq2LFitagQYOsQ4cOFflzAoB8Nsu6ylBZAAAAN5jNmzfrvvvu08KFC9W3b193twMAhSJsAQCAG9rq1au1ZcsWNWjQQH5+ftq1a5deeeUV2e12ff/9944BXQDgRsM7WwAA4IZWpkwZrVq1StOmTdPZs2cVGhqq2NhYTZw4kaAF4IbGnS0AAAAAMICh3wEAAADAAMIWAAAAABhA2AIAAAAAAxggo4jy8vL0+++/KzAw0KUvwwQAAABwc7AsS2fPnlVkZKTTl6tfirBVRL///ruioqLc3QYAAACAG8TRo0dVoUKFy64nbBVRYGCgpD8+0DJlyri5GwAAAADukp6erqioKEdGuBzCVhHlPzpYpkwZwhYAAACAq75exAAZAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAM8HR3A3DNkSNHdPLkSXe3AbgkNDRUFStWdHcbAAAARhG2SqEjR46oeo2aunD+nLtbAVzi6+evfT/tJXABAICbGmGrFDp58qQunD+nkM6j5RUS5e52gGLJOXVUpz6bopMnTxK2AADATY2wVYp5hUTJJ6Kqu9sAAAAAUAgGyAAAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAY4NawNXHiRP3lL39RYGCgwsLC1K1bN+3bt8+pxrIsxcfHKzIyUn5+fmrZsqV+/PFHp5qsrCwNHz5coaGhCggIUNeuXXXs2DGnmtTUVPXv3192u112u139+/fXmTNnTJ8iAAAAgFuUW8PWhg0bNGzYMG3dulWrV6/WxYsX1a5dO2VmZjpqJk2apKlTp2r69On69ttvFRERobZt2+rs2bOOmhEjRmj58uX64IMPtGnTJmVkZKhz587Kzc111PTt21eJiYlKSEhQQkKCEhMT1b9//+t6vgAAAABuHZ7uPHhCQoLT/Ny5cxUWFqYdO3aoefPmsixL06ZN0wsvvKAePXpIkubPn6/w8HAtWrRIQ4YMUVpamubMmaP3339fbdq0kSQtWLBAUVFRWrNmjdq3b6+9e/cqISFBW7duVaNGjSRJs2fPVpMmTbRv3z5Vr179+p44AAAAgJveDfXOVlpamiQpODhYknTw4EElJyerXbt2jhofHx+1aNFCmzdvliTt2LFDOTk5TjWRkZGqXbu2o2bLli2y2+2OoCVJjRs3lt1ud9RcKisrS+np6U4TAAAAABTVDRO2LMvSqFGj1KxZM9WuXVuSlJycLEkKDw93qg0PD3esS05Olre3t4KCgq5YExYWVuCYYWFhjppLTZw40fF+l91uV1RU1LWdIAAAAIBbyg0Ttp5++ml9//33Wrx4cYF1NpvNad6yrALLLnVpTWH1V9rP2LFjlZaW5piOHj1alNMAAAAAAEk3SNgaPny4VqxYoXXr1qlChQqO5REREZJU4O5TSkqK425XRESEsrOzlZqaesWa48ePFzjuiRMnCtw1y+fj46MyZco4TQAAAABQVG4NW5Zl6emnn9ayZcu0du1aVapUyWl9pUqVFBERodWrVzuWZWdna8OGDWratKkkqUGDBvLy8nKqSUpK0u7dux01TZo0UVpamrZt2+ao+eabb5SWluaoAQAAAICS5NbRCIcNG6ZFixbpk08+UWBgoOMOlt1ul5+fn2w2m0aMGKEJEyYoJiZGMTExmjBhgvz9/dW3b19H7aBBgzR69GiFhIQoODhYcXFxqlOnjmN0wpo1a6pDhw564oknNGvWLEnSk08+qc6dOzMSIQAAAAAj3Bq2Zs6cKUlq2bKl0/K5c+dq4MCBkqQxY8bo/PnzGjp0qFJTU9WoUSOtWrVKgYGBjvrXX39dnp6e6t27t86fP6/WrVtr3rx58vDwcNQsXLhQzzzzjGPUwq5du2r69OlmTxAAAADALctmWZbl7iZKg/T0dNntdqWlpbn9/a2dO3eqQYMGihgwTT4RVd3aC1BcWckHlDx/hHbs2KF77rnH3e0AAAAUW1GzwQ0xQAYAAAAA3GwIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAA9watjZu3KguXbooMjJSNptNH3/8sdP6gQMHymazOU2NGzd2qsnKytLw4cMVGhqqgIAAde3aVceOHXOqSU1NVf/+/WW322W329W/f3+dOXPG8NkBAAAAuJW5NWxlZmaqXr16mj59+mVrOnTooKSkJMf0+eefO60fMWKEli9frg8++ECbNm1SRkaGOnfurNzcXEdN3759lZiYqISEBCUkJCgxMVH9+/c3dl4AAAAA4OnOg8fGxio2NvaKNT4+PoqIiCh0XVpamubMmaP3339fbdq0kSQtWLBAUVFRWrNmjdq3b6+9e/cqISFBW7duVaNGjSRJs2fPVpMmTbRv3z5Vr169ZE8KAAAAAFQK3tlav369wsLCVK1aNT3xxBNKSUlxrNuxY4dycnLUrl07x7LIyEjVrl1bmzdvliRt2bJFdrvdEbQkqXHjxrLb7Y6awmRlZSk9Pd1pAgAAAICiuqHDVmxsrBYuXKi1a9dqypQp+vbbb/XAAw8oKytLkpScnCxvb28FBQU5bRceHq7k5GRHTVhYWIF9h4WFOWoKM3HiRMc7Xna7XVFRUSV4ZgAAAABudm59jPBq+vTp4/hz7dq11bBhQ0VHR2vlypXq0aPHZbezLEs2m80x/+c/X67mUmPHjtWoUaMc8+np6QQuAAAAAEV2Q9/ZulT58uUVHR2t/fv3S5IiIiKUnZ2t1NRUp7qUlBSFh4c7ao4fP15gXydOnHDUFMbHx0dlypRxmgAAAACgqEpV2Dp16pSOHj2q8uXLS5IaNGggLy8vrV692lGTlJSk3bt3q2nTppKkJk2aKC0tTdu2bXPUfPPNN0pLS3PUAAAAAEBJc+tjhBkZGTpw4IBj/uDBg0pMTFRwcLCCg4MVHx+vnj17qnz58jp06JCef/55hYaGqnv37pIku92uQYMGafTo0QoJCVFwcLDi4uJUp04dx+iENWvWVIcOHfTEE09o1qxZkqQnn3xSnTt3ZiRCAAAAAMa4NWxt375drVq1csznvyM1YMAAzZw5Uz/88IPee+89nTlzRuXLl1erVq304YcfKjAw0LHN66+/Lk9PT/Xu3Vvnz59X69atNW/ePHl4eDhqFi5cqGeeecYxamHXrl2v+N1eAAAAAHCt3Bq2WrZsKcuyLrv+yy+/vOo+fH199dZbb+mtt966bE1wcLAWLFjgUo8AAAAA4IpS9c4WAAAAAJQWhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAANcClsHDx4s6T4AAAAA4KbiUtiqWrWqWrVqpQULFujChQsl3RMAAAAAlHouha1du3apfv36Gj16tCIiIjRkyBBt27atpHsDAAAAgFLLpbBVu3ZtTZ06Vb/99pvmzp2r5ORkNWvWTLVq1dLUqVN14sSJku4TAAAAAEqVaxogw9PTU927d9dHH32kV199Vb/88ovi4uJUoUIFPfroo0pKSiqpPgEAAACgVLmmsLV9+3YNHTpU5cuX19SpUxUXF6dffvlFa9eu1W+//aYHH3ywpPoEAAAAgFLF05WNpk6dqrlz52rfvn3q2LGj3nvvPXXs2FG33fZHdqtUqZJmzZqlGjVqlGizAAAAAFBauBS2Zs6cqccff1yPPfaYIiIiCq2pWLGi5syZc03NAQAAAEBp5VLY2r9//1VrvL29NWDAAFd2DwAAAAClnkvvbM2dO1dLliwpsHzJkiWaP3/+NTcFAAAAAKWdS2HrlVdeUWhoaIHlYWFhmjBhwjU3BQAAAAClnUth6/Dhw6pUqVKB5dHR0Tpy5Mg1NwUAAAAApZ1LYSssLEzff/99geW7du1SSEjINTcFAAAAAKWdS2HroYce0jPPPKN169YpNzdXubm5Wrt2rZ599lk99NBDJd0jAAAAAJQ6Lo1GOH78eB0+fFitW7eWp+cfu8jLy9Ojjz7KO1sAAAAAIBfDlre3tz788EP961//0q5du+Tn56c6deooOjq6pPsDAAAAgFLJpbCVr1q1aqpWrVpJ9QIAAAAANw2XwlZubq7mzZunr776SikpKcrLy3Nav3bt2hJpDgAAAABKK5fC1rPPPqt58+apU6dOql27tmw2W0n3BQAAAAClmkth64MPPtBHH32kjh07lnQ/AAAAAHBTcGnod29vb1WtWrWkewEAAACAm4ZLYWv06NF64403ZFlWSfcDAAAAADcFlx4j3LRpk9atW6cvvvhCtWrVkpeXl9P6ZcuWlUhzAAAAAFBauRS2ypYtq+7du5d0LwAAAABw03ApbM2dO7ek+wAAAACAm4pL72xJ0sWLF7VmzRrNmjVLZ8+elST9/vvvysjIKLHmAAAAAKC0cunO1uHDh9WhQwcdOXJEWVlZatu2rQIDAzVp0iRduHBB77zzTkn3CQAAAAClikt3tp599lk1bNhQqamp8vPzcyzv3r27vvrqqxJrDgAAAABKK5dHI/zvf/8rb29vp+XR0dH67bffSqQxAAAAACjNXLqzlZeXp9zc3ALLjx07psDAwGtuCgAAAABKO5fCVtu2bTVt2jTHvM1mU0ZGhl566SV17NixpHoDAAAAgFLLpccIX3/9dbVq1Up33XWXLly4oL59+2r//v0KDQ3V4sWLS7pHAAAAACh1XApbkZGRSkxM1OLFi7Vz507l5eVp0KBBeuSRR5wGzAAAAACAW5VLYUuS/Pz89Pjjj+vxxx8vyX4AAAAA4KbgUth67733rrj+0UcfdakZAAAAALhZuBS2nn32Waf5nJwcnTt3Tt7e3vL39ydsAQAAALjluTQaYWpqqtOUkZGhffv2qVmzZgyQAQAAAAByMWwVJiYmRq+88kqBu14AAAAAcCsqsbAlSR4eHvr9999LcpcAAAAAUCq59M7WihUrnOYty1JSUpKmT5+u++67r0QaAwAAAIDSzKWw1a1bN6d5m82mcuXK6YEHHtCUKVNKoi8AAAAAKNVcClt5eXkl3QcAAAAA3FRK9J0tAAAAAMAfXLqzNWrUqCLXTp061ZVDAAAAAECp5lLY+u6777Rz505dvHhR1atXlyT9/PPP8vDw0D333OOos9lsJdMlAAAAAJQyLoWtLl26KDAwUPPnz1dQUJCkP77o+LHHHtP999+v0aNHl2iTAAAAAFDauPTO1pQpUzRx4kRH0JKkoKAgjR8/ntEIAQAAAEAuhq309HQdP368wPKUlBSdPXv2mpsCAAAAgNLOpbDVvXt3PfbYY1q6dKmOHTumY8eOaenSpRo0aJB69OhR0j0CAAAAQKnj0jtb77zzjuLi4tSvXz/l5OT8sSNPTw0aNEiTJ08u0QYBAAAAoDRyKWz5+/trxowZmjx5sn755RdZlqWqVasqICCgpPsDAAAAgFLpmr7UOCkpSUlJSapWrZoCAgJkWVZJ9QUAAAAApZpLYevUqVNq3bq1qlWrpo4dOyopKUmSNHjwYIZ9BwAAAAC5GLZGjhwpLy8vHTlyRP7+/o7lffr0UUJCQok1BwAAAACllUvvbK1atUpffvmlKlSo4LQ8JiZGhw8fLpHGAAAAAKA0c+nOVmZmptMdrXwnT56Uj4/PNTcFAAAAAKWdS2GrefPmeu+99xzzNptNeXl5mjx5slq1alVizQEAAABAaeXSY4STJ09Wy5YttX37dmVnZ2vMmDH68ccfdfr0af33v/8t6R4BAAAAoNRx6c7WXXfdpe+//1733nuv2rZtq8zMTPXo0UPfffedqlSpUtI9AgAAAECpU+ywlZOTo1atWik9PV3jxo3TZ599ps8//1zjx49X+fLli7WvjRs3qkuXLoqMjJTNZtPHH3/stN6yLMXHxysyMlJ+fn5q2bKlfvzxR6earKwsDR8+XKGhoQoICFDXrl117Ngxp5rU1FT1799fdrtddrtd/fv315kzZ4p76gAAAABQZMUOW15eXtq9e7dsNts1HzwzM1P16tXT9OnTC10/adIkTZ06VdOnT9e3336riIgItW3bVmfPnnXUjBgxQsuXL9cHH3ygTZs2KSMjQ507d1Zubq6jpm/fvkpMTFRCQoISEhKUmJio/v37X3P/AAAAAHA5Lr2z9eijj2rOnDl65ZVXrungsbGxio2NLXSdZVmaNm2aXnjhBfXo0UOSNH/+fIWHh2vRokUaMmSI0tLSNGfOHL3//vtq06aNJGnBggWKiorSmjVr1L59e+3du1cJCQnaunWrGjVqJEmaPXu2mjRpon379ql69erXdA4AAAAAUBiXwlZ2drb+85//aPXq1WrYsKECAgKc1k+dOvWaGzt48KCSk5PVrl07xzIfHx+1aNFCmzdv1pAhQ7Rjxw7l5OQ41URGRqp27dravHmz2rdvry1btshutzuCliQ1btxYdrtdmzdvvmzYysrKUlZWlmM+PT39ms8JAAAAwK2jWGHr119/1Z133qndu3frnnvukST9/PPPTjUl8XihJCUnJ0uSwsPDnZaHh4c7vjg5OTlZ3t7eCgoKKlCTv31ycrLCwsIK7D8sLMxRU5iJEydq3Lhx13QOAAAAAG5dxQpbMTExSkpK0rp16yRJffr00ZtvvlkgEJWkS8ObZVlXDXSX1hRWf7X9jB07VqNGjXLMp6enKyoqqqhtAwAAALjFFWuADMuynOa/+OILZWZmlmhD+SIiIiSpwN2nlJQUR7iLiIhQdna2UlNTr1hz/PjxAvs/ceLEFUOij4+PypQp4zQBAAAAQFG59D1b+S4NXyWpUqVKioiI0OrVqx3LsrOztWHDBjVt2lSS1KBBA3l5eTnVJCUlaffu3Y6aJk2aKC0tTdu2bXPUfPPNN0pLS3PUAAAAAEBJK9ZjhDabrcCjd9fyjlZGRoYOHDjgmD948KASExMVHBysihUrasSIEZowYYJiYmIUExOjCRMmyN/fX3379pUk2e12DRo0SKNHj1ZISIiCg4MVFxenOnXqOEYnrFmzpjp06KAnnnhCs2bNkiQ9+eST6ty5MyMRAgAAADCmWGHLsiwNHDhQPj4+kqQLFy7oqaeeKjAa4bJly4q0v+3bt6tVq1aO+fx3pAYMGKB58+ZpzJgxOn/+vIYOHarU1FQ1atRIq1atUmBgoGOb119/XZ6enurdu7fOnz+v1q1ba968efLw8HDULFy4UM8884xj1MKuXbte9ru9AAAAAKAk2KxiPAv42GOPFalu7ty5Ljd0o0pPT5fdbldaWprb39/auXOnGjRooIgB0+QTUdWtvQDFlZV8QMnzR2jHjh2OUU0BAABKk6Jmg2Ld2boZQxQAAAAAmHBNA2QAAAAAAApH2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGHBDh634+HjZbDanKSIiwrHesizFx8crMjJSfn5+atmypX788UenfWRlZWn48OEKDQ1VQECAunbtqmPHjl3vUwEAAABwi7mhw5Yk1apVS0lJSY7phx9+cKybNGmSpk6dqunTp+vbb79VRESE2rZtq7NnzzpqRowYoeXLl+uDDz7Qpk2blJGRoc6dOys3N9cdpwMAAADgFuHp7gauxtPT0+luVj7LsjRt2jS98MIL6tGjhyRp/vz5Cg8P16JFizRkyBClpaVpzpw5ev/999WmTRtJ0oIFCxQVFaU1a9aoffv2lz1uVlaWsrKyHPPp6eklfGYAAAAAbmY3/J2t/fv3KzIyUpUqVdJDDz2kX3/9VZJ08OBBJScnq127do5aHx8ftWjRQps3b5Yk7dixQzk5OU41kZGRql27tqPmciZOnCi73e6YoqKiDJwdAAAAgJvVDR22GjVqpPfee09ffvmlZs+ereTkZDVt2lSnTp1ScnKyJCk8PNxpm/DwcMe65ORkeXt7Kygo6LI1lzN27FilpaU5pqNHj5bgmQEAAAC42d3QjxHGxsY6/lynTh01adJEVapU0fz589W4cWNJks1mc9rGsqwCyy5VlBofHx/5+Pi42DkAAACAW90NfWfrUgEBAapTp47279/veI/r0jtUKSkpjrtdERERys7OVmpq6mVrAAAAAMCEUhW2srKytHfvXpUvX16VKlVSRESEVq9e7VifnZ2tDRs2qGnTppKkBg0ayMvLy6kmKSlJu3fvdtQAAAAAgAk39GOEcXFx6tKliypWrKiUlBSNHz9e6enpGjBggGw2m0aMGKEJEyYoJiZGMTExmjBhgvz9/dW3b19Jkt1u16BBgzR69GiFhIQoODhYcXFxqlOnjmN0QgAAAAAw4YYOW8eOHdPDDz+skydPqly5cmrcuLG2bt2q6OhoSdKYMWN0/vx5DR06VKmpqWrUqJFWrVqlwMBAxz5ef/11eXp6qnfv3jp//rxat26tefPmycPDw12nBQAAAOAWYLMsy3J3E6VBenq67Ha70tLSVKZMGbf2snPnTjVo0EARA6bJJ6KqW3sBiisr+YCS54/Qjh07dM8997i7HQAAgGIrajYoVe9sAQAAAEBpQdgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGELYAAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABhC2AAAAAMAAwhYAAAAAGEDYAgAAAAADCFsAAAAAYABhCwAAAAAM8HR3AwBuTXv37nV3C4BLQkNDVbFiRXe3AQAoBQhbAK6r3IxUyWZTv3793N0K4BJfP3/t+2kvgQsAcFWELQDXVV5WhmRZCuk8Wl4hUe5uByiWnFNHdeqzKTp58iRhCwBwVYQtAG7hFRIln4iq7m4DAADAGAbIAAAAAAADCFsAAAAAYABhCwAAAAAMIGwBAAAAgAGELQAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQAAAAAGeLq7AQAASpu9e/e6uwXAJaGhoapYsaK72wBuGYQtAACKKDcjVbLZ1K9fP3e3ArjE189f+37aS+ACrhPCFgAARZSXlSFZlkI6j5ZXSJS72wGKJefUUZ36bIpOnjxJ2AKuE8IWAADF5BUSJZ+Iqu5uAwBwg2OADAAAAAAwgLAFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAQ78DAADcQvbu3evuFgCXhYaGlqrvibulwtaMGTM0efJkJSUlqVatWpo2bZruv/9+d7cFAABgXG5GqmSzqV+/fu5uBXCZr5+/9v20t9QErlsmbH344YcaMWKEZsyYofvuu0+zZs1SbGys9uzZU2p+WAAAAK7Ky8qQLEshnUfLKyTK3e0AxZZz6qhOfTZFJ0+eLDW/v98yYWvq1KkaNGiQBg8eLEmaNm2avvzyS82cOVMTJ050c3cAAADXh1dIlHwiqrq7DeCWcEuErezsbO3YsUN///vfnZa3a9dOmzdvLnSbrKwsZWVlOebT0tIkSenp6eYaLaKMjAxJUlbyAeVlX3BzN0Dx5Jw6KonrF6UT1y9KM65flHY5p49J+uN3YXf/Tp5/fMuyrlh3S4StkydPKjc3V+Hh4U7Lw8PDlZycXOg2EydO1Lhx4wosj4q6cW67p3453d0tAC7j+kVpxvWL0ozrF6VdixYt3N2Cw9mzZ2W32y+7/pYIW/lsNpvTvGVZBZblGzt2rEaNGuWYz8vL0+nTpxUSEnLZba6X9PR0RUVF6ejRoypTpoxbewGKi+sXpRnXL0ozrl+g5FiWpbNnzyoyMvKKdbdE2AoNDZWHh0eBu1gpKSkF7nbl8/HxkY+Pj9OysmXLmmrRJWXKlOEvS5RaXL8ozbh+UZpx/QIl40p3tPLdEl9q7O3trQYNGmj16tVOy1evXq2mTZu6qSsAAAAAN7Nb4s6WJI0aNUr9+/dXw4YN1aRJE/373//WkSNH9NRTT7m7NQAAAAA3oVsmbPXp00enTp3Syy+/rKSkJNWuXVuff/65oqOj3d1asfn4+Oill14q8JgjUBpw/aI04/pFacb1C1x/Nutq4xUCAAAAAIrtlnhnCwAAAACuN8IWAAAAABhA2AIAAAAAAwhbAAAAAGAAYesGtnHjRnXp0kWRkZGy2Wz6+OOPC9Ts3btXXbt2ld1uV2BgoBo3bqwjR45c/2aBS1zt+rXZbIVOkydPdk/DwJ9c7frNyMjQ008/rQoVKsjPz081a9bUzJkz3dMscImrXb/Hjx/XwIEDFRkZKX9/f3Xo0EH79+93T7PATY6wdQPLzMxUvXr1NH369ELX//LLL2rWrJlq1Kih9evXa9euXXrxxRfl6+t7nTsFCrra9ZuUlOQ0vfvuu7LZbOrZs+d17hQo6GrX78iRI5WQkKAFCxZo7969GjlypIYPH65PPvnkOncKFHSl69eyLHXr1k2//vqrPvnkE3333XeKjo5WmzZtlJmZ6YZugZsbQ7+XEjabTcuXL1e3bt0cyx566CF5eXnp/fffd19jQBEUdv1eqlu3bjp79qy++uqr69cYUASFXb+1a9dWnz599OKLLzqWNWjQQB07dtS//vUvN3QJFO7S6/fnn39W9erVtXv3btWqVUuSlJubq7CwML366qsaPHiwG7sFbj7c2Sql8vLytHLlSlWrVk3t27dXWFiYGjVqVOijhsCN7vjx41q5cqUGDRrk7laAImnWrJlWrFih3377TZZlad26dfr555/Vvn17d7cGXFFWVpYkOT0F4+HhIW9vb23atMldbQE3LcJWKZWSkqKMjAy98sor6tChg1atWqXu3burR48e2rBhg7vbA4pl/vz5CgwMVI8ePdzdClAkb775pu666y5VqFBB3t7e6tChg2bMmKFmzZq5uzXgimrUqKHo6GiNHTtWqampys7O1iuvvKLk5GQlJSW5uz3gpuPp7gbgmry8PEnSgw8+qJEjR0qS7r77bm3evFnvvPOOWrRo4c72gGJ599139cgjj/C+IUqNN998U1u3btWKFSsUHR2tjRs3aujQoSpfvrzatGnj7vaAy/Ly8tL//d//adCgQQoODpaHh4fatGmj2NhYd7cG3JQIW6VUaGioPD09dddddzktr1mzJo8BoFT5+uuvtW/fPn344YfubgUokvPnz+v555/X8uXL1alTJ0lS3bp1lZiYqNdee42whRtegwYNlJiYqLS0NGVnZ6tcuXJq1KiRGjZs6O7WgJsOjxGWUt7e3vrLX/6iffv2OS3/+eefFR0d7aaugOKbM2eOGjRooHr16rm7FaBIcnJylJOTo9tuc/5fqIeHh+OpA6A0sNvtKleunPbv36/t27frwQcfdHdLwE2HO1s3sIyMDB04cMAxf/DgQSUmJio4OFgVK1bU3/72N/Xp00fNmzdXq1atlJCQoE8//VTr1693X9PA/+9q168kpaena8mSJZoyZYq72gQKdbXrt0WLFvrb3/4mPz8/RUdHa8OGDXrvvfc0depUN3YN/OFq1++SJUtUrlw5VaxYUT/88IOeffZZdevWTe3atXNj18BNysINa926dZakAtOAAQMcNXPmzLGqVq1q+fr6WvXq1bM+/vhj9zUM/ElRrt9Zs2ZZfn5+1pkzZ9zXKFCIq12/SUlJ1sCBA63IyEjL19fXql69ujVlyhQrLy/PvY0D1tWv3zfeeMOqUKGC5eXlZVWsWNH6xz/+YWVlZbm3aeAmxfdsAQAAAIABvLMFAAAAAAYQtgAAAADAAMIWAAAAABhA2AIAAAAAAwhbAAAAAGAAYQsAAAAADCBsAQAAAIABhC0AAAAAMICwBQC44dhsNn388cfubuOmwGcJAO5D2AIAXFfJyckaPny4KleuLB8fH0VFRalLly766quvjBxv/fr1stlsOnPmjJH9SzdGoImPj9fdd9/t1h4AAM483d0AAODWcejQId13330qW7asJk2apLp16yonJ0dffvmlhg0bpp9++sndLV6WZVnKzc2Vpyf/6wQAFA13tgAA183QoUNls9m0bds29erVS9WqVVOtWrU0atQobd26tdBtCrszlZiYKJvNpkOHDkmSDh8+rC5duigoKEgBAQGqVauWPv/8cx06dEitWrWSJAUFBclms2ngwIGS/ghPkyZNUuXKleXn56d69epp6dKlBY775ZdfqmHDhvLx8dHXX3/t0nnPnTtXNWvWlK+vr2rUqKEZM2Y41h06dEg2m03Lli1Tq1at5O/vr3r16mnLli1O+5g9e7aioqLk7++v7t27a+rUqSpbtqwkad68eRo3bpx27dolm80mm82mefPmObY9efKkunfvLn9/f8XExGjFihUunQcAoHj45zkAwHVx+vRpJSQk6H//938VEBBQYH1+cHDFsGHDlJ2drY0bNyogIEB79uzR7bffrqioKP3f//2fevbsqX379qlMmTLy8/OTJP3jH//QsmXLNHPmTMXExGjjxo3q16+fypUrpxYtWjj2PWbMGL322muqXLmySz3Onj1bL730kqZPn6769evru+++0xNPPKGAgAANGDDAUffCCy/otddeU0xMjF544QU9/PDDOnDggDw9PfXf//5XTz31lF599VV17dpVa9as0YsvvujYtk+fPtq9e7cSEhK0Zs0aSZLdbnesHzdunCZNmqTJkyfrrbfe0iOPPKLDhw8rODi42OcDACg6whYA4Lo4cOCALMtSjRo1SnzfR44cUc+ePVWnTh1JUuXKlR3r8gNFWFiYIyxlZmZq6tSpWrt2rZo0aeLYZtOmTZo1a5ZT2Hr55ZfVtm1bl3v717/+pSlTpqhHjx6SpEqVKmnPnj2aNWuWU9iKi4tTp06dJP0RjmrVqqUDBw6oRo0aeuuttxQbG6u4uDhJUrVq1bR582Z99tlnkiQ/Pz/dfvvt8vT0VERERIEeBg4cqIcffliSNGHCBL311lvatm2bOnTo4PJ5AQCujrAFALguLMuS9MdgEiXtmWee0V//+letWrVKbdq0Uc+ePVW3bt3L1u/Zs0cXLlwoEKKys7NVv359p2UNGzZ0ua8TJ07o6NGjGjRokJ544gnH8osXLzrdeZLk1G/58uUlSSkpKapRo4b27dun7t27O9Xfe++9jrB1NX/ed0BAgAIDA5WSklLs8wEAFA9hCwBwXcTExMhms2nv3r3q1q1bkbe77bY/Xi/OD2uSlJOT41QzePBgtW/fXitXrtSqVas0ceJETZkyRcOHDy90n3l5eZKklStX6o477nBa5+Pj4zRf2COPRZV/nNmzZ6tRo0ZO6zw8PJzmvby8HH/OD6T521uWVSCk/vnzuJo/7zt///n7BgCYwwAZAIDrIjg4WO3bt9fbb7+tzMzMAusvNzR7uXLlJElJSUmOZYmJiQXqoqKi9NRTT2nZsmUaPXq0Zs+eLUny9vaWJOXm5jpq77rrLvn4+OjIkSOqWrWq0xQVFeXqKRYQHh6uO+64Q7/++muB41SqVKnI+6lRo4a2bdvmtGz79u1O897e3k7nCABwP+5sAQCumxkzZqhp06a699579fLLL6tu3bq6ePGiVq9erZkzZ2rv3r0FtskPQPHx8Ro/frz279+vKVOmONWMGDFCsbGxqlatmlJTU7V27VrVrFlTkhQdHS2bzabPPvtMHTt2lJ+fnwIDAxUXF6eRI0cqLy9PzZo1U3p6ujZv3qzbb7/d6V2qojp48GCBEFi1alXFx8frmWeeUZkyZRQbG6usrCxt375dqampGjVqVJH2PXz4cDVv3lxTp05Vly5dtHbtWn3xxRdOd7vuvPNORw8VKlRQYGBggbt0AIDrzAIA4Dr6/fffrWHDhlnR0dGWt7e3dccdd1hdu3a11q1b56iRZC1fvtwxv2nTJqtOnTqWr6+vdf/991tLliyxJFkHDx60LMuynn76aatKlSqWj4+PVa5cOat///7WyZMnHdu//PLLVkREhGWz2awBAwZYlmVZeXl51htvvGFVr17d8vLyssqVK2e1b9/e2rBhg2VZlrVu3TpLkpWamnrVc5JU6JR/TgsXLrTuvvtuy9vb2woKCrKaN29uLVu2zLIsyzp48KAlyfruu+8c+0tNTXXa3rIs69///rd1xx13WH5+fla3bt2s8ePHWxEREY71Fy5csHr27GmVLVvWkmTNnTu30M/SsizLbrc71gMAzLFZVjEe+gYAADeEJ554Qj/99JPL3/0FADCPxwgBACgFXnvtNbVt21YBAQH64osvNH/+fKcvRwYA3Hi4swUAQCnQu3dvrV+/XmfPnlXlypU1fPhwPfXUU+5uCwBwBYQtAAAAADCAod8BAAAAwADCFgAAAAAYQNgCAAAAAAMIWwAAAABgAGELAAAAAAwgbAEAAACAAYQtAAAAADCAsAUAAAAABvx/SsMO1LON+TgAAAAASUVORK5CYII=",
"text/plain": "<Figure size 1000x600 with 1 Axes>"
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "Min cluster length: 16\nMax cluster length: 19\nMean cluster length: 16.14\nMedian cluster length: 16.0\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "9e810962",
"cell_type": "code",
"source": "total_ids = sum(len(cluster) for cluster in diverse_clusters)\nmissing_count = sum(1 for cluster in diverse_clusters for id in cluster if id in missing_ids)\npresent_count = total_ids - missing_count\n\nprint(f\"\\nTotal IDs in clusters: {total_ids}\")\nprint(f\"Missing IDs: {missing_count} ({missing_count/total_ids:.2%})\")\nprint(f\"Present IDs: {present_count} ({present_count/total_ids:.2%})\")\n\n# calculate coverage\nall_ids = set(misconception_ids + missing_ids)\ncovered_ids = set(id for cluster in diverse_clusters for id in cluster)\ncoverage = len(covered_ids) / len(all_ids)\nprint(f\"Coverage: {coverage:.2%} of all IDs\")",
"execution_count": 19,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "\nTotal IDs in clusters: 39358\nMissing IDs: 14772 (37.53%)\nPresent IDs: 24586 (62.47%)\nCoverage: 98.42% of all IDs\n"
}
]
},
{
"metadata": {
"trusted": false
},
"id": "ca38ef54",
"cell_type": "code",
"source": "import json\n\nwith open(\"../data/scratch/diverse_clusters.json\", \"w\") as f:\n json.dump(diverse_clusters, f)",
"execution_count": 22,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "b555c57a",
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "5965a4af",
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"id": "93564ef2",
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"gist": {
"id": "",
"data": {
"description": "clustering - eedi",
"public": true
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3 (ipykernel)",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.10.15",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment