我的神经网络只学习一些数据集

My Neural Network is only learning some data sets

本文关键字:数据集 学习 神经网络 我的      更新时间:2023-10-16

我已经创建了以下NN,它应该基于反向传播进行学习。

我从大量的阅读和一堆不同的教程中把它拼凑在一起。

为了进行测试,我尝试给它一个XOR问题。每个数据集有2个输入和2个输出。这两个输入都是10,并且这两个输出应当指示是应当输出0(第一输出)还是应当输出1(第二输出)。

当我给它以下数据时会发生什么:

___________________________________________________________________________
| Input 1 | Input 2 | Expected 1 | Expected 2 | NN Output 1 | NN Output 2 |
|-------------------------------------------------------------------------|
|    0    |    1    |     1      |     0      |    0.49     |    0.50     |
|    1    |    0    |     1      |     0      |    0.98     |    0.01     |
|    1    |    1    |     0      |     1      |    0.01     |    0.98     |
|    0    |    0    |     0      |     1      |    0.49     |    0.50     |
---------------------------------------------------------------------------

上面希望清楚的是,对于给定的两个问题;这有点奏效,假设有一定的误差,得到0.01以内的答案是很好的。

但对于另外两个答案,这是远远不够的。当然,阶跃函数会得到正确的结果,但它基本上是说存在50/50的分裂。

这是100000个时期和0.03的学习率,您在上面看到的是实际的训练数据。

如果我将学习率提高到0.9;结果不同,但也让我质疑:

___________________________________________________________________________
| Input 1 | Input 2 | Expected 1 | Expected 2 | NN Output 1 | NN Output 2 |
|-------------------------------------------------------------------------|
|    0    |    1    |     1      |     0      |    0.99     |    0.00     |
|    1    |    0    |     1      |     0      |    0.99     |    0.00     |
|    1    |    1    |     0      |     1      |    0.49     |    0.99     |
|    0    |    0    |     0      |     1      |    0.00     |    0.99     |
---------------------------------------------------------------------------

好多了;但是对于CCD_ 7输入仍然存在奇怪的输出。

我的代码很短,在下面。这是完整的代码:

#include <iostream>
#include <array>
#include <random>
#include <vector>
class RandomGenerator
{
public:
RandomGenerator(const double min, const double max)
:
m_ran(),
m_twister(m_ran()),
m_distrib(min,max)
{
}
double operator()(void) { return m_distrib(m_twister); }
private:
std::random_device                      m_ran;
std::mt19937_64                         m_twister;
std::uniform_real_distribution<double>  m_distrib;
} randGen(-2,2);
double sigmoid(const double x)
{
return 1.0 / (1.0 + std::exp(-x));
}
double softplus(const double x)
{
return std::log(1.0 + std::exp(x));
}
double step(const double x)
{
return x > 0 ? 1 : 0;
}
template<int NumInputs, double(*ActivationFunction)(const double)>
class Neuron
{
public:
void SetInput(const std::size_t index, const double value)
{
m_inputsAndWeights[index].value = value;
}
double GetInput(const std::size_t index) const { return m_inputsAndWeights[index].value; }
void SetWeight(const std::size_t index, const double weight)
{
m_inputsAndWeights[index].weight = weight;
}
double GetWeight(const std::size_t index) const { return m_inputsAndWeights[index].weight; }
void SetBiasWeight(const double weight) { m_biasWeight = weight; }
double GetBiasWeight() const { return m_biasWeight; }
double GetOutput() const
{
double output = 0;
for(const auto& p : m_inputsAndWeights)
output += p.value * p.weight;
output += 1.0 * m_biasWeight;
return ActivationFunction(output);
}
private:
struct DataPair
{
double value;
double weight;
};
std::array<DataPair,NumInputs> m_inputsAndWeights;
double m_biasWeight;
};
template<std::size_t NumInputs, std::size_t NumOutputs>
class NeuralNetwork
{
public:
static constexpr NumHidden() { return (NumInputs+NumOutputs) / 2; }
SetInputs(std::array<double,NumInputs> inputData)
{
for(auto& i : m_hiddenNeurons)
{
for(auto index = 0; index < inputData.size(); ++index)
i.SetInput(index,inputData[index]);
}
}
std::array<double,NumOutputs> GetOutputs() const
{
std::array<double,NumOutputs> outputs;
for(auto i = 0; i < NumOutputs; ++i)
{
outputs[i] = m_outputNeurons[i].GetOutput();
}
return outputs;
}
void PassForward(std::array<double,NumInputs> inputData)
{
SetInputs(inputData);
for(auto i = 0; i < NumHidden(); ++i)
{
for(auto& o : m_outputNeurons)
{
o.SetInput(i,m_hiddenNeurons[i].GetOutput());
}
}
}
void Train(std::vector<std::array<double,NumInputs>> trainingData,
std::vector<std::array<double,NumOutputs>> targetData,
double learningRate, std::size_t numEpochs)
{
for(auto& h : m_hiddenNeurons)
{
for(auto i = 0; i < NumInputs; ++i)
h.SetWeight(i,randGen());
h.SetBiasWeight(randGen());
}
for(auto& o : m_outputNeurons)
{
for(auto h = 0; h < NumHidden(); ++h)
o.SetWeight(h,randGen());
o.SetBiasWeight(randGen());
}
for(std::size_t e = 0; e < numEpochs; ++e)
{
for(std::size_t dataIndex = 0; dataIndex < trainingData.size(); ++dataIndex)
{
PassForward(trainingData[dataIndex]);
std::array<double,NumHidden()+1> deltaHidden;
std::array<double,NumOutputs> deltaOutput;
for(auto i = 0; i < NumOutputs; ++i)
{
auto output = m_outputNeurons[i].GetOutput();
deltaOutput[i] = output * (1.0 - output) * (targetData[dataIndex][i] - output);
}
for(auto i = 0; i < NumHidden(); ++i)
{
double error = 0;
for(auto j = 0; j < NumOutputs; ++j)
{
error += m_outputNeurons[j].GetWeight(i) * deltaOutput[j];
}
auto output = m_hiddenNeurons[i].GetOutput();
deltaHidden[i] = output * (1.0 - output) * error;
}
for(auto i = 0; i < NumOutputs; ++i)
{
for(auto j = 0; j < NumHidden(); ++j)
{
auto currentWeight = m_outputNeurons[i].GetWeight(j);
m_outputNeurons[i].SetWeight(j,currentWeight + learningRate * deltaOutput[i] * m_hiddenNeurons[j].GetOutput());
}
auto currentWeight = m_outputNeurons[i].GetBiasWeight();
m_outputNeurons[i].SetBiasWeight(currentWeight + learningRate * deltaOutput[i] * (1.0*currentWeight));
}
for(auto i = 0; i < NumHidden(); ++i)
{
for(auto j = 0; j < NumInputs; ++j)
{
auto currentWeight = m_hiddenNeurons[i].GetWeight(j);
m_hiddenNeurons[i].SetWeight(j,currentWeight + learningRate * deltaHidden[i] * m_hiddenNeurons[i].GetInput(j));
}
auto currentWeight = m_hiddenNeurons[i].GetBiasWeight();
m_hiddenNeurons[i].SetBiasWeight(currentWeight + learningRate * deltaHidden[i] * (1.0*currentWeight));
}
}
}
}
private:
std::array<Neuron<NumInputs,sigmoid>,NumHidden()> m_hiddenNeurons;
std::array<Neuron<NumHidden(),sigmoid>,NumOutputs> m_outputNeurons;
};
int main()
{
NeuralNetwork<2,2> NN;
std::vector<std::array<double,2>> trainingData = {{{0,1},{1,0},{1,1},{0,0}}};
std::vector<std::array<double,2>> targetData = {{{1,0},{1,0},{0,1},{0,1}}};
NN.Train(trainingData,targetData,0.03,100000);
for(auto i = 0; i < trainingData.size(); ++i)
{
NN.PassForward(trainingData[i]);
auto outputs = NN.GetOutputs();
for(auto o = 0; o < outputs.size(); ++o)
{
std::cout << "Out " << o << ":t" << outputs[o] << std::endl;
}
}
return 0;
}

几天前我也做了同样的事情,我可以告诉你,如果你遇到了一些不幸的权重初始化,10万次反向传播迭代是不够的。不要随机初始化你的权重,对于大的权重,S形很容易陷入饱和,另一方面,0权重也没有帮助。我已经初始化了我的权重+/-(0.3,0.7),收敛性显著提高。