Perceptron Toy Example

by allenlu2007

 

本文參考:  The Nature of Code, Chapter 10. Neural Networks, by Daniel Shiffman

前文提到 neural net 的三階段: Perceptron, shallow neural net with back proof, and deep neural net 

先從最簡單的 perceptron 開始:

NewImage

output = f(input0 * weight0 + input1 * weight1)    f 一般是 nonlinear function 例如 sign, tanh, or sigmoid.  

此處 (Perceptron) 假設是 sign function, 意即取數後的 sign.  + 為 +1;  – 為 -1, binary output state

 

首先看 forward prop

NewImage 

NewImage

Sum = 6 + (-4) = 2

NewImage

 

加上 bias to avoid on the boundary 如下:

 

NewImage

 

再來看 backward prop (learning or find the weight)

下一步要回答是如何找到 weight? 就是類似 gradient descent 的方式。(What’s the gradient of sgn function?)

從另一個角度 output = sgn( W * X)  where W = (w1, w2, 1) and X = (x1, x2, b)

最後 learn or train 出來的 W 可以說是 filter.

 

一般先用 random initial conditions for weight 如下 C code:

NewImage

 

再來就是用 gradient descent 來收歙 weight.

NewImage

 

NewImage

NewImage

NewImage

 

 

 

 

完整的 code (使用 processing 平台) 可以從 Nature of Code 的 Github download.

NewImage

 

// The Nature of Code

// Daniel Shiffman

// http://natureofcode.com

 

// Simple Perceptron Example

// See: http://en.wikipedia.org/wiki/Perceptron

 

// Code based on text “Artificial Intelligence”, George Luger

 

// A list of points we will use to “train” the perceptron

Trainer[] training = new Trainer[2000];

// A Perceptron object

Perceptron ptron;

 

// We will train the perceptron with one “Point” object at a time

int count = 0;

 

// Coordinate space

float xmin = -400;

float ymin = -100;

float xmax =  400;

float ymax =  100;

 

// The function to describe a line 

float f(float x) {

  return 0.4*x+1;

}

 

void setup() {

  size(640, 360);

 

  // The perceptron has 3 inputs — x, y, and bias

  // Second value is “Learning Constant”

  ptron = new Perceptron(3, 0.00001);  // Learning Constant is low just b/c it’s fun to watch, this is not necessarily optimal

 

  // Create a random set of training points and calculate the “known” answer

  for (int i = 0; i < training.length; i++) {

    float x = random(xmin, xmax);

    float y = random(ymin, ymax);

    int answer = 1;

    if (y < f(x)) answer = -1;

    training[i] = new Trainer(x, y, answer);

  }

  smooth();

}

 

 

void draw() {

  background(255);

  translate(width/2,height/2);

 

  // Draw the line

  strokeWeight(4);

  stroke(127);

  float x1 = xmin;

  float y1 = f(x1);

  float x2 = xmax;

  float y2 = f(x2);

  line(x1,y1,x2,y2);

 

  // Draw the line based on the current weights

  // Formula is weights[0]*x + weights[1]*y + weights[2] = 0

  stroke(0);

  strokeWeight(1);

  float[] weights = ptron.getWeights();

  x1 = xmin;

  y1 = (-weights[2] – weights[0]*x1)/weights[1];

  x2 = xmax;

  y2 = (-weights[2] – weights[0]*x2)/weights[1];

  line(x1,y1,x2,y2);

 

 

 

  // Train the Perceptron with one “training” point at a time

  ptron.train(training[count].inputs, training[count].answer);

  count = (count + 1) % training.length;

 

  // Draw all the points based on what the Perceptron would “guess”

  // Does not use the “known” correct answer

  for (int i = 0; i < count; i++) {

    stroke(0);

    strokeWeight(1);

    fill(0);

    int guess = ptron.feedforward(training[i].inputs);

    if (guess > 0) noFill();

 

    ellipse(training[i].inputs[0], training[i].inputs[1], 8, 8);

  }

}

 

 

 

// The Nature of Code

// Daniel Shiffman

// http://natureofcode.com

 

// Simple Perceptron Example

// See: http://en.wikipedia.org/wiki/Perceptron

 

// A class to describe a training point

// Has an x and y, a “bias” (1) and known output

// Could also add a variable for “guess” but not required here

 

class Trainer {

  

  float[] inputs;

  int answer; 

  

  Trainer(float x, float y, int a) {

    inputs = new float[3];

    inputs[0] = x;

    inputs[1] = y;

    inputs[2] = 1;

    answer = a;

  }

}

 

// Daniel Shiffman

// The Nature of Code

// http://natureofcode.com

 

// Simple Perceptron Example

// See: http://en.wikipedia.org/wiki/Perceptron

 

// Perceptron Class

 

class Perceptron {

  float[] weights;  // Array of weights for inputs

  float c;          // learning constant

 

  // Perceptron is created with n weights and learning constant

  Perceptron(int n, float c_) {

    weights = new float[n];

    // Start with random weights

    for (int i = 0; i < weights.length; i++) {

      weights[i] = random(-1,1); 

    }

    c = c_;

  }

 

  // Function to train the Perceptron

  // Weights are adjusted based on “desired” answer

  void train(float[] inputs, int desired) {

    // Guess the result

    int guess = feedforward(inputs);

    // Compute the factor for changing the weight based on the error

    // Error = desired output – guessed output

    // Note this can only be 0, -2, or 2

    // Multiply by learning constant

    float error = desired – guess;

    // Adjust weights based on weightChange * input

    for (int i = 0; i < weights.length; i++) {

      weights[i] += c * error * inputs[i];         

    }

  }

 

  // Guess -1 or 1 based on input values

  int feedforward(float[] inputs) {

    // Sum all values

    float sum = 0;

    for (int i = 0; i < weights.length; i++) {

      sum += inputs[i]*weights[i];

    }

    // Result is sign of the sum, -1 or 1

    return activate(sum);

  }

  

  int activate(float sum) {

    if (sum > 0) return 1;

    else return -1; 

  }

  

  // Return weights

  float[] getWeights() {

    return weights; 

  }

}

 

 

https://processing.org/

Advertisements