### Perceptron Toy Example

#### by allenlu2007

本文參考: The Nature of Code, Chapter 10. Neural Networks, by Daniel Shiffman

前文提到 neural net 的三階段: Perceptron, shallow neural net with back proof, and deep neural net

先從最簡單的 perceptron 開始:

output = f(input0 * weight0 + input1 * weight1) f 一般是 nonlinear function 例如 sign, tanh, or sigmoid.

此處 (Perceptron) 假設是 sign function, 意即取數後的 sign. + 為 +1; – 為 -1, binary output state

## 首先看 forward prop

Sum = 6 + (-4) = 2

加上 bias to avoid on the boundary 如下:

## 再來看 backward prop (learning or find the weight)

下一步要回答是如何找到 weight? 就是類似 gradient descent 的方式。(What’s the gradient of sgn function?)

從另一個角度 output = sgn( W * X) where W = (w1, w2, 1) and X = (x1, x2, b)

最後 learn or train 出來的 W 可以說是 filter.

一般先用 random initial conditions for weight 如下 C code:

再來就是用 gradient descent 來收歙 weight.

完整的 code (使用 processing 平台) 可以從 Nature of Code 的 Github download.

// The Nature of Code

// Daniel Shiffman

// Simple Perceptron Example

// See: http://en.wikipedia.org/wiki/Perceptron

// Code based on text “Artificial Intelligence”, George Luger

// A list of points we will use to “train” the perceptron

Trainer[] training = new Trainer[2000];

// A Perceptron object

Perceptron ptron;

// We will train the perceptron with one “Point” object at a time

int count = 0;

// Coordinate space

float xmin = -400;

float ymin = -100;

float xmax = 400;

float ymax = 100;

// The function to describe a line

float f(float x) {

return 0.4*x+1;

}

void setup() {

size(640, 360);

// The perceptron has 3 inputs — x, y, and bias

// Second value is “Learning Constant”

ptron = new Perceptron(3, 0.00001); // Learning Constant is low just b/c it’s fun to watch, this is not necessarily optimal

// Create a random set of training points and calculate the “known” answer

for (int i = 0; i < training.length; i++) {

float x = random(xmin, xmax);

float y = random(ymin, ymax);

int answer = 1;

if (y < f(x)) answer = -1;

training[i] = new Trainer(x, y, answer);

}

smooth();

}

void draw() {

background(255);

translate(width/2,height/2);

// Draw the line

strokeWeight(4);

stroke(127);

float x1 = xmin;

float y1 = f(x1);

float x2 = xmax;

float y2 = f(x2);

line(x1,y1,x2,y2);

// Draw the line based on the current weights

// Formula is weights[0]*x + weights[1]*y + weights[2] = 0

stroke(0);

strokeWeight(1);

float[] weights = ptron.getWeights();

x1 = xmin;

y1 = (-weights[2] – weights[0]*x1)/weights[1];

x2 = xmax;

y2 = (-weights[2] – weights[0]*x2)/weights[1];

line(x1,y1,x2,y2);

// Train the Perceptron with one “training” point at a time

ptron.train(training[count].inputs, training[count].answer);

count = (count + 1) % training.length;

// Draw all the points based on what the Perceptron would “guess”

// Does not use the “known” correct answer

for (int i = 0; i < count; i++) {

stroke(0);

strokeWeight(1);

fill(0);

int guess = ptron.feedforward(training[i].inputs);

if (guess > 0) noFill();

ellipse(training[i].inputs[0], training[i].inputs[1], 8, 8);

}

}

// The Nature of Code

// Daniel Shiffman

// Simple Perceptron Example

// See: http://en.wikipedia.org/wiki/Perceptron

// A class to describe a training point

// Has an x and y, a “bias” (1) and known output

// Could also add a variable for “guess” but not required here

class Trainer {

float[] inputs;

int answer;

Trainer(float x, float y, int a) {

inputs = new float[3];

inputs[0] = x;

inputs[1] = y;

inputs[2] = 1;

answer = a;

}

}

// Daniel Shiffman

// The Nature of Code

// Simple Perceptron Example

// See: http://en.wikipedia.org/wiki/Perceptron

// Perceptron Class

class Perceptron {

float[] weights; // Array of weights for inputs

float c; // learning constant

// Perceptron is created with n weights and learning constant

Perceptron(int n, float c_) {

weights = new float[n];

// Start with random weights

for (int i = 0; i < weights.length; i++) {

weights[i] = random(-1,1);

}

c = c_;

}

// Function to train the Perceptron

// Weights are adjusted based on “desired” answer

void train(float[] inputs, int desired) {

// Guess the result

int guess = feedforward(inputs);

// Compute the factor for changing the weight based on the error

// Error = desired output – guessed output

// Note this can only be 0, -2, or 2

// Multiply by learning constant

float error = desired – guess;

// Adjust weights based on weightChange * input

for (int i = 0; i < weights.length; i++) {

weights[i] += c * error * inputs[i];

}

}

// Guess -1 or 1 based on input values

int feedforward(float[] inputs) {

// Sum all values

float sum = 0;

for (int i = 0; i < weights.length; i++) {

sum += inputs[i]*weights[i];

}

// Result is sign of the sum, -1 or 1

return activate(sum);

}

int activate(float sum) {

if (sum > 0) return 1;

else return -1;

}

// Return weights

float[] getWeights() {

return weights;

}

}