{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np\n", "import torch\n", "import torch.optim as optim\n", "\n", "torch.set_printoptions(edgeitems=2)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([11, 1])" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t_c = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0]\n", "t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]\n", "t_c = torch.tensor(t_c).unsqueeze(1) # <1>\n", "t_u = torch.tensor(t_u).unsqueeze(1) # <1>\n", "\n", "t_u.shape" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([ 0, 2, 8, 4, 10, 6, 7, 3, 1]), tensor([9, 5]))" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "n_samples = t_u.shape[0]\n", "n_val = int(0.2 * n_samples)\n", "\n", "shuffled_indices = torch.randperm(n_samples)\n", "\n", "train_indices = shuffled_indices[:-n_val]\n", "val_indices = shuffled_indices[-n_val:]\n", "\n", "train_indices, val_indices" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "t_u_train = t_u[train_indices]\n", "t_c_train = t_c[train_indices]\n", "\n", "t_u_val = t_u[val_indices]\n", "t_c_val = t_c[val_indices]\n", "\n", "t_un_train = 0.1 * t_u_train\n", "t_un_val = 0.1 * t_u_val" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[3.7056],\n", " [3.0721]], grad_fn=)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch.nn as nn\n", "\n", "linear_model = nn.Linear(1, 1) # <1>\n", "linear_model(t_un_val)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Parameter containing:\n", "tensor([[0.5508]], requires_grad=True)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "linear_model.weight" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Parameter containing:\n", "tensor([0.3785], requires_grad=True)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "linear_model.bias" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([0.9293], grad_fn=)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = torch.ones(1)\n", "linear_model(x)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[0.9293],\n", " [0.9293],\n", " [0.9293],\n", " [0.9293],\n", " [0.9293],\n", " [0.9293],\n", " [0.9293],\n", " [0.9293],\n", " [0.9293],\n", " [0.9293]], grad_fn=)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = torch.ones(10, 1)\n", "linear_model(x)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "linear_model = nn.Linear(1, 1) # <1>\n", "optimizer = optim.SGD(\n", " linear_model.parameters(), # <2>\n", " lr=1e-2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "linear_model.parameters()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Parameter containing:\n", " tensor([[0.1674]], requires_grad=True), Parameter containing:\n", " tensor([-0.9106], requires_grad=True)]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(linear_model.parameters())" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val, t_c_train, t_c_val):\n", " for epoch in range(1, n_epochs + 1):\n", " t_p_train = model(t_un_train) # <1>\n", " loss_train = loss_fn(t_p_train, t_c_train)\n", "\n", " t_p_val = model(t_un_val) # <1>\n", " loss_val = loss_fn(t_p_val, t_c_val)\n", " \n", " optimizer.zero_grad()\n", " loss_train.backward() # <2>\n", " optimizer.step()\n", "\n", " if epoch == 1 or epoch % 1000 == 0:\n", " print('Epoch {}, Training loss {}, Validation loss {}'.format(\n", " epoch, float(loss_train), float(loss_val)))\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1, Training loss 353.4972839355469, Validation loss 249.63314819335938\n", "Epoch 1000, Training loss 3.388845920562744, Validation loss 4.086263656616211\n", "Epoch 2000, Training loss 2.8631906509399414, Validation loss 3.975735902786255\n", "Epoch 3000, Training loss 2.85408878326416, Validation loss 3.967984914779663\n", "\n", "Parameter containing:\n", "tensor([[5.4242]], requires_grad=True)\n", "Parameter containing:\n", "tensor([-17.2498], requires_grad=True)\n" ] } ], "source": [ "def loss_fn(t_p, t_c):\n", " squared_diffs = (t_p - t_c)**2\n", " return squared_diffs.mean()\n", "\n", "linear_model = nn.Linear(1, 1) # <1>\n", "optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)\n", "\n", "training_loop(\n", " n_epochs = 3000, \n", " optimizer = optimizer,\n", " model = linear_model,\n", " loss_fn = loss_fn,\n", " t_u_train = t_un_train,\n", " t_u_val = t_un_val, \n", " t_c_train = t_c_train,\n", " t_c_val = t_c_val)\n", "\n", "print()\n", "print(linear_model.weight)\n", "print(linear_model.bias)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1, Training loss 340.8567199707031, Validation loss 239.55545043945312\n", "Epoch 1000, Training loss 3.368995428085327, Validation loss 4.082926273345947\n", "Epoch 2000, Training loss 2.862844467163086, Validation loss 3.975551128387451\n", "Epoch 3000, Training loss 2.854081630706787, Validation loss 3.96795916557312\n", "\n", "Parameter containing:\n", "tensor([[5.4243]], requires_grad=True)\n", "Parameter containing:\n", "tensor([-17.2506], requires_grad=True)\n" ] } ], "source": [ "linear_model = nn.Linear(1, 1)\n", "optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)\n", "\n", "training_loop(\n", " n_epochs = 3000, \n", " optimizer = optimizer,\n", " model = linear_model,\n", " loss_fn = nn.MSELoss(), # <1>\n", " t_u_train = t_un_train,\n", " t_u_val = t_un_val, \n", " t_c_train = t_c_train,\n", " t_c_val = t_c_val)\n", "\n", "print()\n", "print(linear_model.weight)\n", "print(linear_model.bias)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sequential(\n", " (0): Linear(in_features=1, out_features=13, bias=True)\n", " (1): Tanh()\n", " (2): Linear(in_features=13, out_features=1, bias=True)\n", ")" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq_model = nn.Sequential(\n", " nn.Linear(1, 13), # <1>\n", " nn.Tanh(),\n", " nn.Linear(13, 1)) # <2>\n", "seq_model" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[torch.Size([13, 1]), torch.Size([13]), torch.Size([1, 13]), torch.Size([1])]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[param.shape for param in seq_model.parameters()]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.weight torch.Size([13, 1])\n", "0.bias torch.Size([13])\n", "2.weight torch.Size([1, 13])\n", "2.bias torch.Size([1])\n" ] } ], "source": [ "for name, param in seq_model.named_parameters():\n", " print(name, param.shape)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sequential(\n", " (hidden_linear): Linear(in_features=1, out_features=8, bias=True)\n", " (hidden_activation): Tanh()\n", " (output_linear): Linear(in_features=8, out_features=1, bias=True)\n", ")" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from collections import OrderedDict\n", "\n", "seq_model = nn.Sequential(OrderedDict([\n", " ('hidden_linear', nn.Linear(1, 8)),\n", " ('hidden_activation', nn.Tanh()),\n", " ('output_linear', nn.Linear(8, 1))\n", "]))\n", "\n", "seq_model" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hidden_linear.weight torch.Size([8, 1])\n", "hidden_linear.bias torch.Size([8])\n", "output_linear.weight torch.Size([1, 8])\n", "output_linear.bias torch.Size([1])\n" ] } ], "source": [ "for name, param in seq_model.named_parameters():\n", " print(name, param.shape)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Parameter containing:\n", "tensor([-0.2901], requires_grad=True)" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq_model.output_linear.bias" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1, Training loss 198.31817626953125, Validation loss 111.67463684082031\n", "Epoch 1000, Training loss 4.451472282409668, Validation loss 4.551815032958984\n", "Epoch 2000, Training loss 3.2558813095092773, Validation loss 1.3056895732879639\n", "Epoch 3000, Training loss 2.1388132572174072, Validation loss 1.9374333620071411\n", "Epoch 4000, Training loss 1.8055614233016968, Validation loss 2.4102280139923096\n", "Epoch 5000, Training loss 1.74860680103302, Validation loss 2.539191722869873\n", "output tensor([[16.3682],\n", " [ 7.9706]], grad_fn=)\n", "answer tensor([[13.],\n", " [ 8.]])\n", "hidden tensor([[ 12.7645],\n", " [ 0.3165],\n", " [-14.1103],\n", " [-13.4931],\n", " [ 13.3077],\n", " [ -0.5262],\n", " [ 0.3101],\n", " [ -0.5538]])\n" ] } ], "source": [ "optimizer = optim.SGD(seq_model.parameters(), lr=1e-3) # <1>\n", "\n", "training_loop(\n", " n_epochs = 5000, \n", " optimizer = optimizer,\n", " model = seq_model,\n", " loss_fn = nn.MSELoss(),\n", " t_u_train = t_un_train,\n", " t_u_val = t_un_val, \n", " t_c_train = t_c_train,\n", " t_c_val = t_c_val)\n", " \n", "print('output', seq_model(t_un_val))\n", "print('answer', t_c_val)\n", "print('hidden', seq_model.hidden_linear.weight.grad)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from matplotlib import pyplot as plt\n", "\n", "t_range = torch.arange(20., 90.).unsqueeze(1)\n", "\n", "plt.plot(t_u.numpy(), t_c.numpy(), 'o')\n", "plt.plot(t_range.numpy(), seq_model(0.1 * t_range).detach().numpy(), 'c-')\n", "plt.plot(t_u.numpy(), seq_model(0.1 * t_u).detach().numpy(), 'kx')\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1, Training loss 206.9621124267578, Validation loss 120.10285186767578\n", "Epoch 1000, Training loss 55.983009338378906, Validation loss 3.4580602645874023\n", "Epoch 2000, Training loss 31.916854858398438, Validation loss 3.156665086746216\n", "Epoch 3000, Training loss 16.53954315185547, Validation loss 3.4310507774353027\n", "Epoch 4000, Training loss 9.367757797241211, Validation loss 4.064168453216553\n", "Epoch 5000, Training loss 6.250869274139404, Validation loss 4.493365287780762\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Exercises here!\n", "\n", "neuron_count = 20\n", "\n", "seq_model = nn.Sequential(OrderedDict([\n", " ('hidden_linear', nn.Linear(1, neuron_count)),\n", " ('hidden_activation', nn.Tanh()),\n", " ('output_linear', nn.Linear(neuron_count, 1))\n", "]))\n", "\n", "optimizer = optim.SGD(seq_model.parameters(), lr=1e-4)\n", "\n", "training_loop(\n", " n_epochs = 5000, \n", " optimizer = optimizer,\n", " model = seq_model,\n", " loss_fn = nn.MSELoss(),\n", " t_u_train = t_un_train,\n", " t_u_val = t_un_val, \n", " t_c_train = t_c_train,\n", " t_c_val = t_c_val)\n", "\n", "from matplotlib import pyplot as plt\n", "\n", "t_range = torch.arange(20., 90.).unsqueeze(1)\n", "\n", "plt.plot(t_u.numpy(), t_c.numpy(), 'o')\n", "plt.plot(t_range.numpy(), seq_model(0.1 * t_range).detach().numpy(), 'c-')\n", "plt.plot(t_u.numpy(), seq_model(0.1 * t_u).detach().numpy(), 'kx')\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }