Merge branch 'feature/add-lab5' into 'master'

Adding lab5 and lab 6 See merge request !1

Merge branch 'feature/add-lab5' into 'master'
Adding lab5 and lab 6 See merge request !1
87451071 · taishi · e7347cf7 · 2e35c0d0 · 87451071 · 87451071
Commit 87451071 authored Oct 22, 2024 by taishi
Showing with 306 additions and 2 deletions
.gitignore
lab_5/dnn_lib.py
lab_5/envSutisfaction.py
lab_6/mnist_train_validate.py
--- a/.gitignore
+++ b/.gitignore
 .DS_Store
 build
 .vscode
\ No newline at end of file
+__pycache__
\ No newline at end of file
--- a/lab_5/dnn_lib.py
+++ b/lab_5/dnn_lib.py
+import numpy as np
+def scale(X, x_min, x_max):
+    nom = (X-X.min(axis=0))*(x_max-x_min)
+    denom = X.max(axis=0) - X.min(axis=0)
+    denom[denom==0] = 1
+    return x_min + nom/denom
+def data_normalize(raw_data):
+    """
+    Receive raw training data and returns normalized data and array of maximum value for each column.
+    Args:
+        raw_data: raw training data
+    Returns:
+        train_x: normalized data
+        max_values: array that contains maximum value for each column
+    """
+    # TODO 3: implement this method.
+    norm_data = None
+    max_values = None
+    return norm_data, max_values
+def data_normalize_prediction(raw_data, max_values):
+    norm_data=(raw_data - raw_data.min(axis=0))/(raw_data.max(axis=0)- raw_data.min(axis=0))
+    return norm_data
+def sigmoid(Z):
+    return 1 / (1 + np.exp(-Z))
+def relu(Z):
+    # TODO 4: implement relu function.
+    return None
+def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"):
+    """Perform single layer forward propagation.
+    Args:
+        A_prev (np.ndarray): an input vector in previous layer
+        W_curr (np.ndarray): a weight vector for the current layer
+        b_curr (np.ndarray): a bias vector for the current layer
+        activation (str, optional): to specify either relu or sigmoid activation function
+    Returns:
+        A_curr: calculated activation A matrix
+        Z_curr: intermediate Z matrix
+    """
+    # TODO 5: implement this function.
+    # calculation of the input value for the activation function
+    Z_curr = None
+    # selection of activation function
+    if activation is "relu":
+        activation_func = relu
+    elif activation is "sigmoid":
+        activation_func = sigmoid
+    else:
+        raise Exception('Non-supported activation function')
+    # return of calculated activation A and the intermediate Z matrix
+    A_curr = None
+    return A_curr, Z_curr
+def full_forward_propagation(X, params_values):
+    """This function perform full forward propagation using given input vector X and param_values that stores vector of weights and biases.
+    Args:
+        X (np.ndarray): input vector X
+        params_values (_type_): weight and bias vector stored in a dictionary 
+    Returns:
+        A3: output of the network
+        memory: matrix Z and A of each hidden layer, stored in list format 
+    """
+    # TODO 6: implement this method.
+    # You need to call 3 times single_layer_forward_propagation() with correct parameters and then create a memory list with all intermediate matrix values A1, Z1, A2, Z2, A3, Z3 and return it.
+    A1, Z1 = None
+    A2, Z2 = None
+    A3, Z3 = None
+    memory = [
+    {"A1": A1},
+    {"Z1": Z1},
+    {"A2": A2},
+    {"Z2": Z2},
+    {"A3": A3},
+    {"Z3": Z3},
+    ]
+    return A3, memory
+def get_cost_value(Y_hat, Y):
+    # number of examples
+    m = Y_hat.shape[1]
+    # calculation of the cost according to the formula
+    cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
+    return np.squeeze(cost)
+def sigmoid_backward(dA, Z):
+    sig = sigmoid(Z)
+    return dA * sig * (1 - sig)
+def relu_backward(dA, Z):
+    # TODO 8: Implement derivative of relu function
+    dZ = None
+    return dZ
+def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation="relu"):
+    """ This function performs single layer back propagation.
+    Args:
+        dA_curr (np.ndarray): delta A matrix in current layer
+        W_curr (np.ndarray): weight matrix in current layer
+        b_curr (np.ndarray): bias vector in current layer
+        Z_curr (np.ndarray): Z vector stored in current layer
+        A_prev (np.ndarray): A matrix in previous layer
+        activation (str, optional): defines activation function. Either sigmoid or relu.
+    Returns:
+        dA_prev (np.ndarray): delta A matrix in previous layer
+        dW_curr (np.ndarray): delta Weight matrix in current layer
+        db_curr (np.ndarray): delta bias vector in current layer
+    """
+    # TODO 9: Implement this function.
+    # number of examples
+    m = A_prev.shape[1]
+    # selection of activation function
+    if activation is "relu":
+        backward_activation_func = relu_backward
+    elif activation is "sigmoid":
+        backward_activation_func = sigmoid_backward
+    else:
+        raise Exception('Non-supported activation function')
+    # calculation of the activation function derivative
+    dZ_curr = None
+    # derivative of the matrix W
+    dW_curr = None
+    # derivative of the vector b
+    db_curr = None
+    # derivative of the matrix A_prev
+    dA_prev = None
+    return dA_prev, dW_curr, db_curr
--- a/lab_5/envSutisfaction.py
+++ b/lab_5/envSutisfaction.py
+import numpy as np
+import matplotlib.pyplot as plt
+from dnn_lib import *
+learning_rate=0.075
+num_iterations=10
+# TODO 1: Create input and output data set. data size > 1.
+# Replace None with your code.
+# For example: 
+# raw_x = np.array([[20, 40, 30],
+#                   [45, 35, 25]])
+# raw_y = np.array([[1],
+#                   [0]])
+raw_x = None
+raw_y = None
+INPUT_SIZE=3
+HID_LAYER1=5
+HID_LAYER2=4
+OUTPUT_SIZE=1
+np.random.seed(10)
+# TODO 2: Initialize weights and bias for all connections. Weights should be initialized randomly and bias to zeros.
+# use numpy library to generate random values for weight and bias, and replace None with your code.
+W1 = None
+W2 = None
+W3 = None
+b1 = None
+b2 = None
+b3 = None
+print(raw_x)
+print(raw_y)
+print(raw_x.shape)
+# normalize the training dataset
+train_x, max_values=data_normalize(raw_x)
+print("max value", max_values)
+print(train_x)
+cost_history = []
+#train
+for i in range(num_iterations):
+    # TODO 6: Call implemented  full_forward_propagation.
+    A3, memory = None
+    # TODO 7: Call get_cost_value function 
+    cost=None
+    cost_history.append(cost)
+    print(cost)
+    #print("A1", A1)
+    #print("A2", A2)
+    #print("A3", A3)
+    #print("Z1", Z1)
+    #print("Z2", Z2)
+    #print("Z3",Z3)
+    # initiation of gradient descent algorithm
+    dA_last = - (np.divide(raw_y.T, A3) - np.divide(1 - raw_y.T, 1 - A3))
+    dA2_q, dW3_q, db3_q=single_layer_backward_propagation(dA_last, W3, b3, Z3, A2, activation="sigmoid")
+    m=m = A2.shape[1]
+    dZ3=A3-raw_y.T
+    dW3 = np.dot(dZ3, A2.T) / m
+    db3 = np.sum(dZ3, axis=1, keepdims=True) / m
+    dA2 = np.dot(W3.T, dZ3)
+    # TODO 10: Complete implementation of full backpropagation.
+    dA1, dW2, db2=None
+    dA0, dW1, db1=None
+    #print("db1:", db1)
+    #print("db2:", db2)
+    # TODO 11: Update parameter W1, W2, W3, b1, b2, b3.
+param_values={}
+param_values["W1"]=W1
+param_values["b1"]=b1
+param_values["W2"]=W2
+param_values["b2"]=b2
+param_values["W3"]=W3
+param_values["b3"]=b3
+print("Z3:", Z3)
+print("A3:", A3)
+print("W3", W3)
+print("b3", b3)
+print("W2", W2)
+print("b2", b2)
+print("W1", W1)
+print("b1", b1)
+print(cost)
+# TODO 12: Create some input data for prediction.
+x_prediction=np.array([[30, 40, 90]])
+x_prediction_norm=data_normalize_prediction(x_prediction, max_values)
+print("x_pred_norm", x_prediction_norm)
+# TODO 12: Make prediction.
+A_prediction, memory=None
+print("A prediction", A_prediction)
+plt.plot(cost_history)
+plt.show()
--- a/lab_6/mnist_train_validate.py
+++ b/lab_6/mnist_train_validate.py
+import tensorflow as tf
+# Export saved model
+export_dir = 'mymodel'
+# Load and prepare MNIST dataset
+mnist = tf.keras.datasets.mnist
+# Normalize dataset
+(x_train, y_train) , (x_test, y_test) = mnist.load_data()
+x_train = x_train / 255.0
+x_test = x_test / 255.0
+# Build sequential model by stacking layers, choose optimizer and loss function
+model = tf.keras.models.Sequential()
+model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
+model.add(tf.keras.layers.Dense(80, activation='elu'))
+model.add(tf.keras.layers.Dense(60, activation='elu'))
+model.add(tf.keras.layers.Dropout(0.2))
+model.add(tf.keras.layers.Dense(10))
+model.summary()
+predictions = model(x_train[:1]).numpy()
+predictions_prob = tf.nn.softmax(predictions).numpy()
+print ('Probabilities for each class: ' + str(predictions_prob))
+# Take a vector of logits and True index and return scalar loss for each example
+# This loss is equal to the negative log probability of the true class: It is zero if the model is sure of the correct class.
+loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+loss_initial = loss_fn(y_train[:1], predictions).numpy()
+print('Untrained model inital loss: ' + str(loss_initial))
+# Train model
+model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
+# Adjust model parameters to minimize the loss and train it
+model.fit(x_train, y_train, epochs=5)
+# Evaluate model performance
+model.evaluate(x_test, y_test, verbose=2)