{
 "metadata": {
  "name": "PCA Demo"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "raw",
     "metadata": {},
     "source": "The example is from http://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_iris.html#example-decomposition-plot-pca-iris-py"
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "import pylab as pl\nfrom mpl_toolkits.mplot3d import Axes3D\nfrom sklearn import datasets\nfrom sklearn.decomposition import PCA",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 23
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": "Import the Iris dataset. For more on the Iris dataset, see http://en.wikipedia.org/wiki/Iris_flower_data_set. Basically, we have three classes of irises, and four features for each flower."
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "# import some data to play with\niris = datasets.load_iris()\nX = iris.data[:, :2]  # we only take the first two features.\nY = iris.target\nx_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\ny_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 24
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": "Make the first figure"
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "pl.figure(2, figsize=(8, 6))\npl.clf()",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 25
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "# Plot the training points. Colours should show target = Y.\npl.scatter(X[:, 0], X[:, 1], c=Y, cmap=pl.cm.Paired)\npl.xlabel('Sepal length')\npl.ylabel('Sepal width')\npl.xlim(x_min, x_max)\npl.ylim(y_min, y_max)\npl.xticks(())\npl.yticks(())\npl.show()",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 26
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": "Plot the training points. Notice that the blue iris class is pretty much linearly separable from the two reddish classes."
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "# To getter a better understanding of interaction of the dimensions\n# plot the first three PCA dimensions\nfig = pl.figure(1, figsize=(8, 6))\nax = Axes3D(fig, elev=-150, azim=110)\nX_reduced = PCA(n_components=3).fit_transform(iris.data)\n#note that PCA is performed on the features X, not the target Y.",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 27
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": "let's plot the transformed data. In untypical fashion, we added a dimension rather than reduced dimensionality. It's for demonstration, but also, we have three unobserved groups or clusters, the three types of Iris."
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=Y,\n           cmap=pl.cm.Paired)\nax.set_title(\"First three PCA directions\")\nax.set_xlabel(\"1st eigenvector\")\nax.w_xaxis.set_ticklabels([])\nax.set_ylabel(\"2nd eigenvector\")\nax.w_yaxis.set_ticklabels([])\nax.set_zlabel(\"3rd eigenvector\")\nax.w_zaxis.set_ticklabels([])\npl.show()",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 28
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 28
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": "",
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 28
    }
   ],
   "metadata": {}
  }
 ]
}