Commit e7347cf7 by Nazrul_being

Added HW1

parent a4da20db
BIN = hw1
CC = g++
SRC = main.cpp ../lib/src/NeuralNetwork.cpp ../lib/src/Dataset.cpp
all: $(BIN)
$(BIN): $(SRC)
$(CC) -o $(BIN) $(SRC)
run: $(BIN)
./$(BIN)
\ No newline at end of file
/*Example code to work with the dataset*/
#include "../lib/includes/Dataset.h"
#include <iostream>
int main() {
Dataset dataset;
// Load the dataset
dataset.processDataset("titanic_dataset.csv");
// Set input columns (Pclass, Sex, Age, Fare; 1-based indexing)
dataset.inputColumns({3, 5, 6, 10});
// Set output column (Survived; 1-based indexing)
dataset.outputColumn(2);
// Shuffle the dataset
dataset.setShuffleSeed(42);
// Divide into train and test sets
dataset.divideTrainAndTestData();
// Get a sample of training data
auto trainSample = dataset.getTrainDataSample(5);
std::cout << "Training Data Sample:\n";
for (const auto& row : trainSample) {
for (const auto& col : row) {
std::cout << col << " ";
}
std::cout << "\n";
}
// Get a sample of testing data
auto testSample = dataset.getTestDataSample(3);
std::cout << "Testing Data Sample:\n";
for (const auto& row : testSample) {
for (const auto& col : row) {
std::cout << col << " ";
}
std::cout << "\n";
}
return 0;
}
......@@ -20,7 +20,7 @@ int main() {
};
std::vector<double> hiddenBiases = {0.0, 0.0, 0.0};
std::vector<std::vector<double>> hiddenToOutputWeights = {
{0.3, -0.6, 0.9}
{0.3, -0.6, 0.9, 0.1}
};
std::vector<double> outputBiases = {0.0};
......
......@@ -14,7 +14,8 @@ std::vector<double> hiddenLayerBias = {0, 0, 0}; // Initialize biases for the h
std::vector<double> hiddenLayerWeightedSum(NUM_OF_HIDDEN_NODES); // Weighted sum (z1) before applying activation function
// Weights from input layer to hidden layer
std::vector<std::vector<double>> inputToHiddenWeights = {
std::vector<std::vector<double>> inputToHiddenWeights =
{
{0.25, 0.5, 0.05}, // Weights for hidden neuron 1
{0.8, 0.82, 0.3}, // Weights for hidden neuron 2
{0.5, 0.45, 0.19} // Weights for hidden neuron 3
......@@ -25,7 +26,8 @@ std::vector<double> outputLayerBias = {0}; // Initialize bias for the output ne
std::vector<double> outputLayerWeightedSum(NUM_OF_OUTPUT_NODES); // Weighted sum (z2) before applying activation function
// Weights from hidden layer to output layer
std::vector<std::vector<double>> hiddenToOutputWeights = {
std::vector<std::vector<double>> hiddenToOutputWeights =
{
{0.48, 0.73, 0.03} // Weights for the output neuron
};
......@@ -37,7 +39,8 @@ std::vector<std::vector<double>> normalizedInput(2, std::vector<double>(NUM_OF_F
std::vector<std::vector<double>> expectedOutput = {{1}}; // Expected output (labels) for each training example
// Task 1: Perform a forward pass through the network
void task1() {
void task1()
{
NeuralNetwork nn;
// Raw input features before normalization
......@@ -86,7 +89,8 @@ void task1() {
}
// Task 2: Save and load the network's state
void task2() {
void task2()
{
NeuralNetwork nn;
const std::string filename = "network_save.txt";
......@@ -115,6 +119,7 @@ void task2() {
int main() {
task1();
std::cout << "\n";
task2();
return 0;
}
#ifndef DATASET_H
#define DATASET_H
#include <vector>
#include <string>
#include <utility>
class Dataset {
public:
// Load the dataset from a CSV file
void processDataset(const std::string& filename);
// Select input columns
void inputColumns(const std::vector<int>& columns);
// Select output column
void outputColumn(int column);
// Divide data into training and testing sets (80/20)
void divideTrainAndTestData();
// Get a sample from the training data
std::vector<std::vector<std::string>> getTrainDataSample(size_t sampleSize);
// Get a sample from the testing data
std::vector<std::vector<std::string>> getTestDataSample(size_t sampleSize);
// Set a custom seed for shuffling
void setShuffleSeed(unsigned int seed);
private:
std::vector<std::vector<std::string>> data;
std::vector<std::vector<std::string>> trainData;
std::vector<std::vector<std::string>> testData;
std::vector<int> inputCols;
int outputCol;
// Seed for random shuffling
unsigned int shuffleSeed = 42;
// Helper functions
std::vector<std::string> split(const std::string& line, char delimiter);
void shuffleTrainData(); // Shuffle only the training data
// Filter row to include only selected input and output columns
std::vector<std::string> filterRow(const std::vector<std::string>& row);
// Helper function to get a sample from the data
std::vector<std::vector<std::string>> getSample(const std::vector<std::vector<std::string>>& sourceData, size_t sampleSize);
};
#endif // DATASET_H
......@@ -39,6 +39,16 @@ public:
std::vector<std::vector<double>>& hiddenToOutputWeights, std::vector<double>& outputBiases,
double learningRate, int epochs);
// Backpropagation learning function for two hidden layers and batch learning
void backpropagation2layer(const std::vector<double>& input, const std::vector<double>& expectedOutput,
std::vector<std::vector<double>>& inputToHidden1Weights, std::vector<double>& hidden1Biases,
std::vector<std::vector<double>>& hidden1ToHidden2Weights, std::vector<double>& hidden2Biases,
std::vector<double>& hidden2ToOutputWeights, double& outputBias,
double learningRate,
std::vector<std::vector<double>>& inputToHidden1WeightGradients, std::vector<double>& hidden1BiasGradients,
std::vector<std::vector<double>>& hidden1ToHidden2WeightGradients, std::vector<double>& hidden2BiasGradients,
std::vector<double>& hidden2ToOutputWeightGradients, double& outputBiasGradient);
// Activation functions (ReLU and Sigmoid)
double relu(double x);
......
#include "../includes/Dataset.h"
#include <fstream>
#include <sstream>
#include <random>
#include <algorithm>
#include <cctype> // For isspace
// Seed for random number generator
unsigned int shuffleSeed = 449; // Default seed value for reproducibility
// Helper function to trim spaces from the beginning and end of a string
std::string trim(const std::string& str) {
size_t first = str.find_first_not_of(' ');
size_t last = str.find_last_not_of(' ');
return (first == std::string::npos || last == std::string::npos) ? "" : str.substr(first, last - first + 1);
}
// Helper function to split a string by commas while handling quotes
std::vector<std::string> Dataset::split(const std::string& line, char delimiter) {
std::vector<std::string> tokens;
std::string token;
bool inQuotes = false;
for (char ch : line) {
if (ch == '"' && !inQuotes) {
inQuotes = true;
} else if (ch == '"' && inQuotes) {
inQuotes = false;
} else if (ch == delimiter && !inQuotes) {
tokens.push_back(trim(token)); // Trim spaces before adding
token.clear();
} else {
token += ch;
}
}
// Add the last token
tokens.push_back(trim(token));
// Remove unnecessary quotes from tokens
for (auto& tok : tokens) {
if (!tok.empty() && tok.front() == '"' && tok.back() == '"') {
tok = tok.substr(1, tok.size() - 2);
}
}
return tokens;
}
// Load the dataset from a CSV file
void Dataset::processDataset(const std::string& filename) {
std::ifstream file(filename);
if (!file.is_open()) {
throw std::runtime_error("Could not open file: " + filename);
}
std::string line;
bool firstRow = true;
while (std::getline(file, line)) {
if (firstRow) {
firstRow = false; // Skip header row
continue;
}
data.push_back(split(line, ',')); // Parse each line
}
file.close();
}
// Select input columns (converting 1-based indices to 0-based internally)
void Dataset::inputColumns(const std::vector<int>& columns) {
inputCols.clear();
inputCols.reserve(columns.size());
std::transform(columns.begin(), columns.end(), std::back_inserter(inputCols), [](int col) { return col - 1; });
}
// Select output column (convert 1-based index to 0-based internally)
void Dataset::outputColumn(int column) {
outputCol = column - 1;
}
// Filter the data to include only the selected input and output columns
std::vector<std::string> Dataset::filterRow(const std::vector<std::string>& row) {
std::vector<std::string> filteredRow;
filteredRow.reserve(inputCols.size() + 1);
// Add input columns
for (int colIndex : inputCols) {
if (colIndex < row.size() && !row[colIndex].empty()) {
filteredRow.push_back(row[colIndex]);
} else {
return {}; // Return empty if any of the input columns have no value
}
}
// Add output column
if (outputCol < row.size() && !row[outputCol].empty()) {
filteredRow.push_back(row[outputCol]);
} else {
return {}; // Return empty if output column has no value
}
return filteredRow;
}
// Divide data into training and testing sets (80/20)
void Dataset::divideTrainAndTestData() {
size_t trainSize = static_cast<size_t>(data.size() * 0.8);
trainData.assign(data.begin(), data.begin() + trainSize);
testData.assign(data.begin() + trainSize, data.end());
shuffleTrainData(); // Shuffle only the training data
}
// Shuffle the training data using the seed
void Dataset::shuffleTrainData() {
std::mt19937 g(shuffleSeed); // Use the specified seed for reproducibility
std::shuffle(trainData.begin(), trainData.end(), g);
}
// Set a custom seed for shuffling
void Dataset::setShuffleSeed(unsigned int seed) {
shuffleSeed = seed;
}
// Get a sample from the training data with filtered columns
std::vector<std::vector<std::string>> Dataset::getTrainDataSample(size_t sampleSize) {
return getSample(trainData, sampleSize);
}
// Get a sample from the testing data with filtered columns
std::vector<std::vector<std::string>> Dataset::getTestDataSample(size_t sampleSize) {
return getSample(testData, sampleSize);
}
// Helper function to get a sample from the data
std::vector<std::vector<std::string>> Dataset::getSample(const std::vector<std::vector<std::string>>& sourceData, size_t sampleSize) {
std::vector<std::vector<std::string>> sample;
sample.reserve(sampleSize);
size_t count = 0;
for (const auto& row : sourceData) {
auto filteredRow = filterRow(row);
if (!filteredRow.empty()) {
sample.push_back(filteredRow);
count++;
}
if (count >= sampleSize) {
break;
}
}
return sample;
}
......@@ -78,6 +78,18 @@ void NeuralNetwork::backpropagation(const std::vector<double>& input, const std:
return;
}
void backpropagation2layer(const std::vector<double>& input, const std::vector<double>& expectedOutput,
std::vector<std::vector<double>>& inputToHidden1Weights, std::vector<double>& hidden1Biases,
std::vector<std::vector<double>>& hidden1ToHidden2Weights, std::vector<double>& hidden2Biases,
std::vector<double>& hidden2ToOutputWeights, double& outputBias,
double learningRate,
std::vector<std::vector<double>>& inputToHidden1WeightGradients, std::vector<double>& hidden1BiasGradients,
std::vector<std::vector<double>>& hidden1ToHidden2WeightGradients, std::vector<double>& hidden2BiasGradients,
std::vector<double>& hidden2ToOutputWeightGradients, double& outputBiasGradient)
{
return;
}
void NeuralNetwork::vectorReLU(std::vector<double>& inputVector, std::vector<double>& outputVector)
{
return;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment