Created
December 16, 2013 04:50
-
-
Save dz1984/7982488 to your computer and use it in GitHub Desktop.
I practice to write a Naive Bayes algorithm to classify dataset. The Python script is my reference. The PHP script is written by myself. Just for fun. XD
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| A,0.67,0.54,0.37,0.35,0.76,0.76,0.74,0.12,0.28,0.14,0.17,0.24,0.24,0.03,0.16,0.09,0.15,0.10,0.00,0.02,0.02,0.10,0.27,0.12,0.12,0.01,0.88,0.13,0.12,0.10,1.00,0.25,0.16,0.25,0.24,0.05,0.16,0.16,0.83,0.37,0.22,0.29,0.96,0.79,0.19,0.22,0.23,0.96,0.86,1.00,0.97,1.00,1.00,1.00,1.00,0.63,0.42,0.38,0.35,0.51,0.83,1.00,0.95,1.00,1.00,0.74,1.00,0.83,1.00,0.20,0.27,0.27,0.12,0.27,0.14,0.17,0.03,0.87,1.00,0.91,1.00,0.34,0.31,1.00,0.44,0.27,0.42,0.22,0.72,0.57,0.28,0.57,0.37,0.10,0.46,0.37,0.10,0.46,0.64,0.31,0.22,0.37,0.52,0.36 | |
| A,0.88,0.21,0.14,0.13,0.14,0.14,0.98,0.28,0.58,0.31,0.41,0.86,0.86,0.25,0.76,0.81,0.77,0.67,0.43,0.57,0.57,0.22,0.57,0.26,0.45,0.39,0.24,0.78,0.77,0.66,0.56,0.28,0.41,0.34,0.35,0.05,0.15,0.15,0.86,0.25,0.16,0.22,0.97,0.73,0.41,0.34,0.35,0.95,0.22,0.17,0.22,0.13,0.01,0.04,0.04,0.93,0.07,0.07,0.04,1.00,0.21,0.18,0.22,0.13,0.01,0.62,0.54,0.64,0.48,0.26,0.42,0.42,0.36,0.59,0.39,0.41,0.15,0.65,0.52,0.63,0.44,0.39,0.54,0.43,0.67,0.28,0.65,0.25,0.37,0.66,0.31,0.66,0.00,0.01,0.00,0.00,0.01,0.00,0.58,0.39,0.09,0.00,0.40,0.00 | |
| A,0.84,0.32,0.20,0.21,0.23,0.23,0.96,0.20,0.51,0.24,0.38,0.77,0.77,0.16,0.63,0.58,0.63,0.58,0.38,0.60,0.60,0.22,0.66,0.27,0.46,0.41,0.39,0.58,0.61,0.58,0.63,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,1.00,1.00,0.00,0.00,0.00,1.00,1.00,0.74,1.00,0.71,0.43,0.37,0.37,0.83,0.26,0.19,0.21,0.92,1.00,0.73,1.00,0.71,0.44,0.85,0.95,0.92,0.82,0.23,0.27,0.27,0.15,0.27,0.16,0.17,0.04,1.00,0.96,1.00,0.82,0.39,0.48,0.63,1.00,0.19,1.00,0.30,0.73,1.00,0.26,1.00,0.21,0.10,0.25,0.21,0.10,0.25,1.00,0.40,0.35,0.02,0.64,0.02 | |
| A,0.76,0.51,0.29,0.29,0.28,0.28,0.93,0.15,0.53,0.21,0.46,0.72,0.72,0.11,0.61,0.46,0.59,0.43,0.35,0.58,0.58,0.18,0.68,0.24,0.55,0.31,0.41,0.51,0.58,0.44,0.65,0.30,0.49,0.38,0.54,0.02,0.07,0.07,0.91,0.27,0.14,0.19,0.99,0.71,0.50,0.39,0.55,0.98,0.79,0.73,0.83,0.54,0.33,0.36,0.36,0.72,0.40,0.31,0.30,0.87,0.76,0.74,0.81,0.53,0.33,0.43,0.59,0.49,0.53,0.10,0.23,0.23,0.07,0.22,0.09,0.17,0.02,0.50,0.60,0.53,0.53,0.16,0.44,0.52,0.58,0.15,0.58,0.49,0.35,0.61,0.32,0.61,0.29,0.13,0.32,0.29,0.13,0.32,0.57,0.28,0.20,0.22,0.76,0.20 | |
| A,1.00,0.00,0.00,0.00,0.00,0.00,1.00,0.32,0.44,0.33,0.32,1.00,1.00,0.33,1.00,0.95,0.99,0.90,1.00,1.00,1.00,0.33,0.51,0.34,0.39,1.00,0.02,1.00,0.99,0.91,0.00,0.21,0.02,0.19,0.11,0.00,0.00,0.00,1.00,0.01,0.01,0.00,1.00,0.79,0.02,0.20,0.12,1.00,0.72,0.02,0.65,0.25,0.00,0.00,0.00,0.99,0.01,0.01,0.01,1.00,0.72,0.02,0.66,0.25,0.00,0.41,0.29,0.42,0.30,0.04,0.09,0.09,0.23,0.36,0.24,0.23,0.02,0.40,0.42,0.41,0.29,0.05,0.60,0.00,0.29,0.00,0.32,0.00,0.46,0.39,0.00,0.42,0.17,0.00,0.22,0.17,0.00,0.22,0.40,0.06,0.00,0.31,0.88,0.27 | |
| A,0.31,0.94,0.73,0.54,0.66,0.66,0.54,0.10,0.22,0.11,0.15,0.34,0.34,0.03,0.45,0.64,0.48,0.56,0.10,0.22,0.22,0.10,0.28,0.12,0.18,0.07,0.56,0.66,0.49,0.57,0.90,0.73,0.58,0.75,0.96,0.49,0.58,0.58,0.35,0.90,0.72,0.72,0.41,0.41,0.65,0.65,0.97,0.57,0.52,0.33,0.51,0.31,0.19,0.32,0.32,0.22,0.85,0.80,0.54,0.67,0.24,0.17,0.24,0.21,0.09,0.26,0.33,0.29,0.27,0.04,0.16,0.16,0.09,0.18,0.10,0.11,0.01,0.29,0.36,0.31,0.27,0.07,0.23,0.40,0.30,1.00,0.15,0.42,0.30,0.29,0.85,0.19,0.20,0.05,0.25,0.20,0.05,0.25,0.21,0.46,0.17,0.33,0.00,0.44 | |
| A,0.00,1.00,1.00,1.00,1.00,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,1.00,0.66,0.30,0.63,0.55,0.49,0.64,0.64,0.00,0.63,1.00,1.00,0.00,0.71,0.34,0.33,0.46,0.76,0.69,0.50,0.69,0.54,0.61,0.76,0.76,0.00,1.00,1.00,1.00,0.00,0.34,0.45,0.41,0.42,0.31,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.58,0.06,0.36,0.00,0.77,0.15,0.10,0.60,0.04,0.17,0.26,0.13,0.17,0.26,0.13,0.10,0.21,0.09,0.53,0.61,0.52 | |
| A,0.70,0.68,0.38,0.55,0.42,0.42,0.87,0.19,0.52,0.23,0.43,0.58,0.58,0.11,0.89,0.96,0.90,0.74,0.46,0.52,0.52,0.15,0.34,0.17,0.25,0.24,0.09,0.95,0.92,0.74,0.52,0.87,0.40,0.83,0.68,0.45,0.44,0.44,0.69,0.78,0.43,0.90,0.78,0.16,0.44,0.82,0.68,0.52,0.36,0.29,0.36,0.28,0.04,0.11,0.11,0.73,0.28,0.27,0.35,0.96,0.30,0.33,0.33,0.28,0.04,0.53,0.78,0.61,0.47,0.07,0.12,0.12,0.34,0.88,0.41,0.43,0.04,0.52,0.75,0.58,0.45,0.09,0.41,0.15,0.38,0.42,0.35,0.74,0.46,0.48,0.52,0.46,0.16,0.25,0.15,0.16,0.25,0.15,0.50,0.45,0.20,0.24,0.53,0.21 | |
| A,0.60,0.80,0.48,0.50,0.56,0.56,0.77,0.14,0.34,0.16,0.21,0.44,0.44,0.06,0.49,0.81,0.55,0.59,0.14,0.29,0.29,0.15,0.41,0.18,0.26,0.13,0.54,0.85,0.56,0.59,0.86,0.58,0.90,0.72,0.83,0.12,0.18,0.18,0.84,0.36,0.21,0.33,0.96,0.44,0.92,0.73,0.85,0.87,0.86,0.86,0.92,0.66,0.64,0.64,0.64,0.51,0.63,0.53,0.50,0.58,0.80,0.86,0.88,0.65,0.62,0.46,0.62,0.52,0.46,0.10,0.21,0.21,0.12,0.31,0.14,0.20,0.03,0.52,0.64,0.55,0.46,0.16,0.34,0.70,0.66,0.53,0.61,0.44,0.74,0.79,0.58,0.76,0.37,0.41,0.35,0.37,0.41,0.35,0.92,0.53,0.33,0.23,0.57,0.19 | |
| A,0.39,0.77,0.63,0.50,0.98,0.98,0.40,0.04,0.08,0.05,0.04,0.02,0.02,0.00,0.96,0.27,0.89,0.49,0.02,0.02,0.02,0.04,0.09,0.05,0.05,0.00,0.00,0.26,0.92,0.50,0.98,0.86,0.49,0.84,0.91,1.00,1.00,1.00,0.47,0.70,0.58,0.75,0.18,0.22,0.60,0.80,0.93,0.00,0.72,0.28,0.67,0.33,0.23,0.27,0.27,0.00,0.42,0.90,0.50,0.64,0.41,0.42,0.44,0.30,0.13,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.16,0.23,0.25,0.34,0.24,0.25,0.35,0.34,0.36,0.33,0.16,0.22,0.14,0.16,0.22,0.14,0.34,0.09,0.16,0.34,0.58,0.28 | |
| B,0.68,0.44,0.34,0.19,0.03,0.03,0.99,1.00,1.00,1.00,1.00,0.97,0.97,1.00,0.85,0.50,0.81,0.59,0.15,0.18,0.18,1.00,1.00,1.00,1.00,0.54,0.81,0.39,0.24,0.31,0.96,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,1.00,1.00,0.00,0.00,0.00,1.00,0.75,0.65,0.77,0.46,0.05,0.05,0.05,0.68,0.35,0.33,0.20,0.98,0.71,0.67,0.75,0.47,0.05,1.00,0.61,1.00,0.74,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,0.69,0.83,0.73,0.58,1.00,1.00,0.90,0.40,0.79,0.28,1.00,0.19,0.39,1.00,0.28,0.95,0.98,0.95,0.95,0.98,0.95,0.30,0.62,0.65,0.84,0.78,0.82 | |
| B,0.68,0.60,0.38,0.32,0.27,0.27,0.91,0.34,0.79,0.39,0.59,0.73,0.73,0.25,0.62,0.86,0.66,0.86,0.23,0.37,0.37,0.30,0.92,0.37,0.65,0.34,0.45,0.86,0.62,0.86,0.79,0.68,0.78,0.76,0.94,0.09,0.12,0.12,0.65,0.72,0.44,0.53,0.93,0.38,0.83,0.74,0.96,0.91,0.60,0.76,0.70,0.70,0.19,0.27,0.27,0.69,0.44,0.34,0.33,0.89,0.55,0.78,0.67,0.70,0.18,0.81,0.87,0.87,0.81,0.39,0.48,0.48,0.36,0.78,0.41,0.59,0.17,0.86,0.98,0.89,0.81,0.59,0.51,0.67,0.83,0.73,0.72,0.68,1.00,0.90,0.84,0.84,1.00,1.00,1.00,1.00,1.00,1.00,0.99,1.00,1.00,0.51,0.85,0.45 | |
| B,0.42,0.94,0.65,0.60,0.54,0.54,0.68,0.19,0.52,0.23,0.35,0.46,0.46,0.09,0.82,0.85,0.82,0.76,0.40,0.49,0.49,0.19,0.61,0.23,0.42,0.28,0.18,0.83,0.83,0.77,0.59,0.66,0.41,0.65,0.69,0.34,0.44,0.44,0.57,0.79,0.52,0.70,0.70,0.42,0.49,0.61,0.70,0.67,0.57,0.40,0.56,0.40,0.19,0.29,0.29,0.11,0.67,0.84,0.61,0.66,0.27,0.34,0.31,0.28,0.09,0.17,0.08,0.17,0.12,0.00,0.01,0.01,0.13,0.30,0.15,0.19,0.00,0.15,0.13,0.14,0.08,0.00,0.32,0.24,0.33,0.94,0.21,0.72,0.36,0.39,0.96,0.28,0.47,0.42,0.47,0.47,0.42,0.47,0.36,0.33,0.32,0.52,0.27,0.56 | |
| B,0.72,0.42,0.30,0.30,0.27,0.27,0.92,0.30,0.64,0.34,0.46,0.73,0.73,0.22,1.00,1.00,1.00,1.00,0.77,0.77,0.77,0.30,0.76,0.34,0.56,0.70,0.01,0.98,1.00,1.00,0.22,0.86,0.72,0.89,0.79,0.29,0.29,0.29,0.73,0.47,0.32,0.49,0.88,0.15,0.77,0.89,0.81,0.69,0.30,0.21,0.30,0.19,0.02,0.07,0.07,0.66,0.26,0.32,0.17,0.97,0.22,0.23,0.24,0.17,0.02,0.16,0.16,0.17,0.12,0.00,0.02,0.02,0.17,0.42,0.20,0.20,0.00,0.07,0.09,0.08,0.06,0.00,0.50,0.00,0.28,0.60,0.21,0.50,0.44,0.34,0.62,0.29,0.70,0.47,0.77,0.70,0.47,0.77,0.32,0.32,0.38,0.77,0.54,0.79 | |
| B,0.85,0.27,0.18,0.17,0.23,0.23,0.96,0.21,0.48,0.24,0.34,0.77,0.77,0.17,0.88,0.84,0.87,0.73,0.73,0.83,0.83,0.21,0.58,0.24,0.41,0.52,0.11,0.82,0.88,0.74,0.25,0.93,0.54,0.92,0.75,0.31,0.29,0.29,0.84,0.30,0.20,0.28,0.93,0.05,0.53,0.93,0.76,0.65,0.02,0.01,0.02,0.01,0.00,0.00,0.00,0.99,0.01,0.01,0.01,1.00,0.02,0.01,0.02,0.01,0.00,0.08,0.04,0.08,0.05,0.00,0.01,0.01,0.07,0.14,0.07,0.08,0.00,0.06,0.04,0.06,0.03,0.00,0.48,0.00,0.00,0.23,0.00,0.22,0.00,0.00,0.24,0.00,0.44,0.22,0.51,0.44,0.22,0.51,0.00,0.01,0.00,0.93,0.75,0.90 | |
| B,0.70,0.62,0.37,0.29,0.14,0.14,0.96,0.26,0.50,0.29,0.39,0.86,0.86,0.23,0.74,0.71,0.73,0.75,0.65,0.88,0.88,0.26,0.59,0.29,0.47,0.68,0.28,0.72,0.73,0.75,0.35,1.00,0.68,1.00,0.96,0.21,0.18,0.18,0.70,0.67,0.39,0.49,0.92,0.00,0.67,1.00,0.98,0.77,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,0.00,0.00,0.30,0.23,0.30,0.21,0.02,0.06,0.06,0.34,0.39,0.34,0.31,0.02,0.17,0.20,0.18,0.15,0.01,0.52,0.11,0.03,0.29,0.01,0.41,0.18,0.06,0.38,0.04,0.56,0.33,0.63,0.56,0.33,0.63,0.07,0.11,0.14,1.00,0.78,1.00 | |
| B,0.56,0.76,0.50,0.47,0.64,0.64,0.71,0.23,0.59,0.27,0.36,0.36,0.36,0.08,0.52,0.67,0.55,0.71,0.15,0.28,0.28,0.24,0.68,0.28,0.42,0.20,0.52,0.68,0.52,0.72,0.86,0.87,1.00,0.97,1.00,0.14,0.14,0.14,0.57,1.00,0.57,0.77,0.90,0.18,1.00,0.95,1.00,0.85,0.70,0.59,0.72,0.57,0.66,0.82,0.82,0.55,0.56,0.48,0.46,0.51,0.64,0.58,0.67,0.56,0.63,0.45,0.47,0.48,0.37,0.06,0.12,0.13,0.20,0.59,0.25,0.36,0.02,0.44,0.61,0.49,0.37,0.08,0.33,0.67,0.59,0.71,0.53,0.58,0.75,0.67,0.79,0.62,0.85,0.68,0.89,0.85,0.68,0.89,0.71,0.65,0.67,0.59,0.62,0.53 | |
| B,0.68,0.43,0.34,0.30,0.92,0.91,0.70,0.10,0.31,0.13,0.16,0.09,0.09,0.01,0.49,0.91,0.58,0.56,0.03,0.07,0.07,0.12,0.40,0.15,0.19,0.02,0.50,0.88,0.59,0.56,0.97,0.43,0.39,0.45,0.53,0.29,0.57,0.57,0.68,0.45,0.36,0.41,0.71,0.63,0.43,0.42,0.54,0.73,0.40,0.48,0.46,0.49,0.33,0.71,0.71,0.67,0.35,0.34,0.31,0.69,0.32,0.51,0.42,0.49,0.28,0.30,0.27,0.31,0.22,0.01,0.03,0.03,0.06,0.17,0.08,0.08,0.00,0.35,0.26,0.33,0.22,0.01,0.28,0.54,0.04,0.03,0.07,0.16,0.42,0.13,0.06,0.15,0.46,0.11,0.57,0.46,0.11,0.57,0.15,0.00,0.01,0.88,1.00,0.83 | |
| B,0.75,0.34,0.27,0.23,0.32,0.32,0.92,0.28,0.80,0.35,0.63,0.68,0.68,0.20,0.43,0.61,0.46,0.49,0.07,0.17,0.17,0.28,0.58,0.30,0.40,0.14,0.65,0.63,0.42,0.46,0.94,0.26,0.10,0.25,0.16,0.00,0.02,0.02,0.76,0.24,0.25,0.19,0.99,0.79,0.15,0.21,0.15,1.00,0.89,0.83,0.93,0.68,0.50,0.48,0.48,0.74,0.28,0.26,0.24,0.83,0.87,0.83,0.93,0.69,0.51,0.59,0.70,0.64,0.79,0.38,0.65,0.65,0.28,0.84,0.36,0.63,0.18,0.61,0.72,0.64,0.79,0.57,0.48,0.89,0.40,0.57,0.32,0.62,0.58,0.46,0.69,0.41,0.81,0.62,0.87,0.81,0.62,0.87,0.45,0.49,0.41,0.72,0.71,0.71 | |
| B,0.59,0.52,0.43,0.32,0.80,0.80,0.66,0.14,0.41,0.18,0.26,0.20,0.20,0.03,0.48,0.51,0.48,0.46,0.09,0.19,0.19,0.14,0.52,0.18,0.31,0.08,0.54,0.54,0.48,0.46,0.91,0.69,0.57,0.71,0.78,0.60,0.75,0.75,0.59,0.56,0.45,0.52,0.52,0.35,0.60,0.69,0.76,0.38,0.44,0.31,0.44,0.32,0.16,0.31,0.31,0.55,0.40,0.44,0.33,0.82,0.34,0.36,0.37,0.29,0.13,0.17,0.14,0.17,0.11,0.01,0.03,0.03,0.13,0.20,0.13,0.11,0.00,0.15,0.17,0.15,0.11,0.01,0.28,0.33,0.20,0.30,0.17,0.30,0.22,0.21,0.35,0.20,0.59,0.28,0.69,0.59,0.28,0.69,0.17,0.19,0.15,0.78,0.70,0.78 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| // Author: DonaldIsFreak | |
| // Practice to programming Naive Bayes. | |
| define("MAX_CATEGORY",10); | |
| $category = array(); | |
| $num_category = 0; | |
| $num_samples = 0; | |
| $num_columns = 0; | |
| function mean($aValues){ | |
| return (array_sum($aValues)/count($aValues)); | |
| } | |
| function sd_square($x,$mean){ | |
| return pow($x-$mean,2); | |
| } | |
| function vars($aValues){ | |
| $mean = mean($aValues); | |
| return array_sum(array_map("sd_square",$aValues,array_fill(0,count($aValues),$mean)))/(count($aValues)-1); | |
| } | |
| function sd($aValues){ | |
| return sqrt(vars($aValues)); | |
| } | |
| function categoryFilter($category=""){ | |
| return function($x) use($category) { return $x[0] == $category; }; | |
| } | |
| function maleFilter($aValues){ | |
| return ($aValues[0]=="A"); | |
| } | |
| function femaleFilter($aValues){ | |
| return ($aValues[0]=="B"); | |
| } | |
| function getCol($aValues,$index){ | |
| $temp = array(); | |
| foreach($aValues as $v) | |
| array_push($temp,$v[$index]); | |
| return $temp; | |
| } | |
| function readCSVFile($fName){ | |
| global $category; | |
| $data = array(); | |
| $fp = fopen($fName,'r'); | |
| $feature = "NONE"; | |
| while (!feof($fp)){ | |
| $temp = fgetcsv($fp); | |
| if ($feature!=$temp[0]) | |
| array_push($category,$temp[0]); | |
| array_push($data,$temp); | |
| } | |
| $GLOBALS["num_samples"] = count($data); | |
| $GLOBALS["num_columns"] = count($data[0]); | |
| $GLOBALS["num_category"] = count($category); | |
| fclose($fp); | |
| return $data; | |
| } | |
| function trainClassifier($aValue){ | |
| global $num_columns,$num_samples; | |
| $male_temp = array_filter($aValue,'maleFilter'); | |
| $female_temp = array_filter($aValue,'femaleFilter'); | |
| $male = array(count($male_temp)/$num_samples); | |
| $female = array(count($female_temp)/$num_samples); | |
| for ($i=1; $i<$num_columns; $i++){ | |
| $male_cols = getCol($male_temp,$i); | |
| $female_cols = getCol($female_temp,$i); | |
| array_push($male,array(mean($male_cols),vars($male_cols))); | |
| array_push($female,array(mean($female_cols,vars($female_cols)))); | |
| } | |
| return array($male,$female); | |
| } | |
| function gaussProbability($mean,$vars,$x){ | |
| return exp(-pow($x-$mean,2)/(2*$vars))/sqrt(2*M_PI*$vars); | |
| } | |
| function runClassifier($training,$sample){ | |
| for ($i=0; $i<2; $i++){ | |
| } | |
| } | |
| $data = readCSVFile('data.txt'); | |
| $training = trainClassifier($data); | |
| echo gaussProbability($training[0][1][0],$training[0][1][1],6); | |
| ?> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Author: Krishnamurthy Koduvayur Viswanathan | |
| from __future__ import division | |
| import collections | |
| import math | |
| class Model: | |
| def __init__(self, arffFile): | |
| self.trainingFile = arffFile | |
| self.features = {} #all feature names and their possible values (including the class label) | |
| self.featureNameList = [] #this is to maintain the order of features as in the arff | |
| self.featureCounts = collections.defaultdict(lambda: 1)#contains tuples of the form (label, feature_name, feature_value) | |
| self.featureVectors = [] #contains all the values and the label as the last entry | |
| self.labelCounts = collections.defaultdict(lambda: 0) #these will be smoothed later | |
| def TrainClassifier(self): | |
| for fv in self.featureVectors: | |
| self.labelCounts[fv[len(fv)-1]] += 1 #udpate count of the label | |
| for counter in range(0, len(fv)-1): | |
| self.featureCounts[(fv[len(fv)-1], self.featureNameList[counter], fv[counter])] += 1 | |
| for label in self.labelCounts: #increase label counts (smoothing). remember that the last feature is actually the label | |
| for feature in self.featureNameList[:len(self.featureNameList)-1]: | |
| self.labelCounts[label] += len(self.features[feature]) | |
| def Classify(self, featureVector): #featureVector is a simple list like the ones that we use to train | |
| probabilityPerLabel = {} | |
| for label in self.labelCounts: | |
| logProb = 0 | |
| for featureValue in featureVector: | |
| logProb += math.log(self.featureCounts[(label, self.featureNameList[featureVector.index(featureValue)], featureValue)]/self.labelCounts[label]) | |
| probabilityPerLabel[label] = (self.labelCounts[label]/sum(self.labelCounts.values())) * math.exp(logProb) | |
| print probabilityPerLabel | |
| return max(probabilityPerLabel, key = lambda classLabel: probabilityPerLabel[classLabel]) | |
| def GetValues(self): | |
| file = open(self.trainingFile, 'r') | |
| for line in file: | |
| if line[0] != '@': #start of actual data | |
| self.featureVectors.append(line.strip().lower().split(',')) | |
| else: #feature definitions | |
| if line.strip().lower().find('@data') == -1 and (not line.lower().startswith('@relation')): | |
| self.featureNameList.append(line.strip().split()[1]) | |
| self.features[self.featureNameList[len(self.featureNameList) - 1]] = [featureName.strip() for featureName in line[line.find('{')+1: line.find('}')].strip().split(',')] | |
| file.close() | |
| def TestClassifier(self, arffFile): | |
| file = open(arffFile, 'r') | |
| for line in file: | |
| if line[0] != '@': | |
| vector = line.strip().lower().split(',') | |
| print "classifier: " + self.Classify(vector) + " given " + vector[len(vector) - 1] | |
| if __name__ == "__main__": | |
| model = Model("tennis.arff") | |
| model.GetValues() | |
| model.TrainClassifier() | |
| model.TestClassifier("tennis.arff") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| @RELATION TENNIS | |
| @ATTRIBUTE outlook {sunny, overcast, rain} | |
| @ATTRIBUTE temperature {hot, mild, cool} | |
| @ATTRIBUTE humidity {high, normal, low} | |
| @ATTRIBUTE wind {weak, strong} | |
| @ATTRIBUTE play {yes, no} | |
| @DATA | |
| Sunny,Hot,High,Weak,No | |
| Sunny,Hot,High,Strong,No | |
| Overcast,Hot,High,Weak,Yes | |
| Rain,Mild,High,Weak,Yes | |
| Rain,Cool,Normal,Weak,Yes | |
| Rain,Cool,Normal,Strong,No | |
| Overcast,Cool,Normal,Strong,Yes | |
| Sunny,Mild,High,Weak,No | |
| Sunny,Cool,Normal,Weak,Yes | |
| Rain,Mild,Normal,Weak,Yes | |
| Sunny,Mild,Normal,Strong,Yes | |
| Overcast,Mild,High,Strong,Yes | |
| Overcast,Hot,Normal,Weak,Yes | |
| Rain,Mild,High,Strong,No |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment