mikk-c · September 1, 2020 13:28
diff --git a/exercise_36_2.ipynb b/exercise_36_2.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import networkx as nx\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.metrics import normalized_mutual_info_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the network\n",
    "G = nx.read_edgelist(\"1/data.txt\", delimiter = \"\\t\", create_using = nx.MultiGraph(), data = [(\"layer\", int),])\n",
    "\n",
    "# Load ground truth\n",
    "ground_truth = {}\n",
    "with open(\"1/nodes.txt\", 'r') as f:\n",
    "   for line in f:\n",
    "      fields = line.strip().split('\\t')\n",
    "      ground_truth[fields[0]] = fields[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = []\n",
    "for layer in range(1, 5):\n",
    "   Gl = nx.Graph()\n",
    "   for e in G.edges(data = True):\n",
    "      if e[2][\"layer\"] == layer:\n",
    "         Gl.add_edge(e[0], e[1])\n",
    "   lp = list(nx.algorithms.community.asyn_lpa_communities(Gl))\n",
    "   df.extend([(n, \"%s-%s\" % (layer, c), 1) for c in range(len(lp)) for n in lp[c]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(data = df, columns = (\"node\", \"comm\", \"present\"))\n",
    "df = pd.pivot_table(data = df, index = \"node\", columns = \"comm\", values = \"present\").fillna(0).astype(int)\n",
    "\n",
    "kmeans = KMeans(n_clusters = 4).fit(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NMI: 0.6204\n"
     ]
    }
   ],
   "source": [
    "lp_array = []\n",
    "ground_truth_array = []\n",
    "for i in range(df.shape[0]):\n",
    "   lp_array.append(kmeans.labels_[i])\n",
    "   ground_truth_array.append(ground_truth[df.iloc[i].name])\n",
    "\n",
    "print(\"NMI: %1.4f\" % normalized_mutual_info_score(ground_truth_array, lp_array))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import networkx as nx\n",
	"from sklearn.cluster import KMeans\n",
	"from sklearn.metrics import normalized_mutual_info_score"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Load the network\n",
	"G = nx.read_edgelist(\"1/data.txt\", delimiter = \"\\t\", create_using = nx.MultiGraph(), data = [(\"layer\", int),])\n",
	"\n",
	"# Load ground truth\n",
	"ground_truth = {}\n",
	"with open(\"1/nodes.txt\", 'r') as f:\n",
	" for line in f:\n",
	" fields = line.strip().split('\\t')\n",
	" ground_truth[fields[0]] = fields[1]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = []\n",
	"for layer in range(1, 5):\n",
	" Gl = nx.Graph()\n",
	" for e in G.edges(data = True):\n",
	" if e[2][\"layer\"] == layer:\n",
	" Gl.add_edge(e[0], e[1])\n",
	" lp = list(nx.algorithms.community.asyn_lpa_communities(Gl))\n",
	" df.extend([(n, \"%s-%s\" % (layer, c), 1) for c in range(len(lp)) for n in lp[c]])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = pd.DataFrame(data = df, columns = (\"node\", \"comm\", \"present\"))\n",
	"df = pd.pivot_table(data = df, index = \"node\", columns = \"comm\", values = \"present\").fillna(0).astype(int)\n",
	"\n",
	"kmeans = KMeans(n_clusters = 4).fit(df)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"NMI: 0.6204\n"
	]
	}
	],
	"source": [
	"lp_array = []\n",
	"ground_truth_array = []\n",
	"for i in range(df.shape[0]):\n",
	" lp_array.append(kmeans.labels_[i])\n",
	" ground_truth_array.append(ground_truth[df.iloc[i].name])\n",
	"\n",
	"print(\"NMI: %1.4f\" % normalized_mutual_info_score(ground_truth_array, lp_array))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.6"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}
No results found