{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "\n", "torch.manual_seed(1024)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# 定义线性模型和sigmoid函数\n", "\n", "class Linear:\n", " # input: (B, in_features)\n", " # output: (B, out_features)\n", " \n", " def __init__(self, in_features, out_features, bias=True):\n", " # 对于模型参数的初始化,故意没有做优化\n", " self.weight = torch.randn(in_features, out_features, requires_grad=True) # (in_features, out_features)\n", " if bias:\n", " self.bias = torch.randn(out_features, requires_grad=True) # ( out_features)\n", " else:\n", " self.bias = None\n", " \n", " def __call__(self, x):\n", " # x: (B, in_features)\n", " # self.weight: (in_features, out_features)\n", " self.out = x @ self.weight # (B, out_features)\n", " if self.bias is not None:\n", " self.out += self.bias\n", " return self.out\n", " \n", " def parameters(self):\n", " # 返回模型参数\n", " if self.bias is not None:\n", " return [self.weight, self.bias]\n", " return [self.weight]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([5, 4])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "l = Linear(3, 4)\n", "x = torch.randn(5, 3)\n", "l(x).shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[tensor([[ 0.6459, -0.0353, 0.5852, 0.5732],\n", " [-1.0110, 0.2098, 0.4153, 0.0819],\n", " [-0.3151, -0.5068, 0.3941, 0.3839]], requires_grad=True),\n", " tensor([ 0.5583, -1.1253, 1.5603, 1.9050], requires_grad=True)]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "l.parameters()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "class Sigmoid:\n", " \n", " def __call__(self, x):\n", " self.out = torch.sigmoid(x)\n", " return self.out\n", " \n", " def parameters(self):\n", " return []" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([3, 2])" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s = Sigmoid()\n", "x = torch.randn(3, 2)\n", "s(x).shape" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "class Perceptron:\n", " \n", " def __init__(self, in_features):\n", " self.ln = Linear(in_features, 1)\n", " self.f = Sigmoid()\n", " \n", " def __call__(self, x):\n", " # x: (B, in_features)\n", " self.out = self.f(self.ln(x)) # (B, 1)\n", " return self.out\n", " \n", " def parameters(self):\n", " return self.ln.parameters() + self.f.parameters()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([5, 1])" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p = Perceptron(3)\n", "x = torch.randn(5, 3)\n", "p(x).shape" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "class LogitRegression:\n", " # input: (B, in_features)\n", " # output: (B, 2)\n", " \n", " def __init__(self, in_features):\n", " self.pos = Linear(in_features, 1)\n", " self.neg = Linear(in_features, 1)\n", " \n", " def __call__(self, x):\n", " # x: (B, in_features)\n", " self.out = torch.concat((self.pos(x), self.neg(x)), dim=-1) # (B, 2)\n", " return self.out\n", " \n", " def parameters(self):\n", " return self.pos.parameters() + self.neg.parameters()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([5, 2])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lr = LogitRegression(3)\n", "x = torch.randn(5, 3)\n", "lr(x).shape" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-0.4861, 0.8908],\n", " [-1.2695, -0.6651],\n", " [ 1.6948, 1.6572],\n", " [-0.6641, -0.0216],\n", " [-0.9517, 0.0869]], grad_fn=)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "logits = lr(x)\n", "logits" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-0.4861],\n", " [-1.2695],\n", " [ 1.6948],\n", " [-0.6641],\n", " [-0.9517]], grad_fn=)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lr.pos(x)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "import torch.nn.functional as F\n", "\n", "probs = F.softmax(logits, dim=-1)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([[0.2015, 0.7985],\n", " [0.3533, 0.6467],\n", " [0.5094, 0.4906],\n", " [0.3447, 0.6553],\n", " [0.2614, 0.7386]], grad_fn=)\n", "tensor([1, 1, 0, 1, 1])\n" ] } ], "source": [ "pred = torch.argmax(probs, dim=-1)\n", "print(probs)\n", "print(pred)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(0.4122, grad_fn=)" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "loss = F.cross_entropy(logits, pred)\n", "loss" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[1.6020, 0.2250],\n", " [1.0403, 0.4359],\n", " [0.6745, 0.7121],\n", " [1.0651, 0.4226],\n", " [1.3416, 0.3030]], grad_fn=)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "-probs.log()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(0.4122, grad_fn=)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "-probs.log()[range(5), pred].mean()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import make_blobs\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "data = make_blobs(200, centers=[[-2, -2], [2, 2]])\n", "x, y = data" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.scatter(x[:, 0], x[:, 1])" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "step 0, loss 0.26484909653663635\n", "step 200, loss 0.08326209336519241\n", "step 400, loss 0.051809437572956085\n", "step 600, loss 0.038577087223529816\n", "step 800, loss 0.031144658103585243\n", "step 1000, loss 0.026323309168219566\n", "step 1200, loss 0.022914284840226173\n", "step 1400, loss 0.020361438393592834\n", "step 1600, loss 0.01836979016661644\n", "step 1800, loss 0.016767438501119614\n" ] } ], "source": [ "batch_size = 20\n", "max_steps = 2000\n", "learning_rate = 0.01\n", "x, y = torch.tensor(data[0]).float(), torch.tensor(data[1])\n", "lr = LogitRegression(2)\n", "lossi = []\n", "\n", "for t in range(max_steps):\n", " ix = (t * batch_size) % len(x)\n", " xx = x[ix: ix + batch_size]\n", " yy = y[ix: ix + batch_size] # (20)\n", " logits = lr(xx) # (20, 2)\n", " loss = F.cross_entropy(logits, yy)\n", " loss.backward()\n", " with torch.no_grad():\n", " for p in lr.parameters():\n", " p -= learning_rate * p.grad\n", " p.grad = None\n", " if t % 200 == 0:\n", " print(f'step {t}, loss {loss.item()}')\n", " lossi.append(loss.item())" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(lossi)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }