{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "from transformers import AutoModel, AutoTokenizer, GPT2LMHeadModel, GPT2Model\n", "\n", "\n", "torch.manual_seed(12046)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learning_rate = 6e-4\n", "sequence_len = 1024\n", "batch_size = 8\n", "gra_acc_steps = 8 * 2\n", "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", "eval_iters = 64 * 2\n", "eval_interval = 50" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "llm = GPT2LMHeadModel.from_pretrained('gpt2')\n", "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GPT2LMHeadModel(\n", " (transformer): GPT2Model(\n", " (wte): Embedding(50257, 768)\n", " (wpe): Embedding(1024, 768)\n", " (drop): Dropout(p=0.1, inplace=False)\n", " (h): ModuleList(\n", " (0-11): 12 x GPT2Block(\n", " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", " (attn): GPT2Attention(\n", " (c_attn): Conv1D()\n", " (c_proj): Conv1D()\n", " (attn_dropout): Dropout(p=0.1, inplace=False)\n", " (resid_dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", " (mlp): GPT2MLP(\n", " (c_fc): Conv1D()\n", " (c_proj): Conv1D()\n", " (act): NewGELUActivation()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", " )\n", " (lm_head): Linear(in_features=768, out_features=50257, bias=False)\n", ")" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "class RewardModel(nn.Module):\n", "\n", " def __init__(self, model):\n", " super().__init__()\n", " self.embedding = model\n", " self.score = nn.Linear(model.embed_dim, 1, bias=False)\n", "\n", " def forward(self, x, seq_len=None):\n", " # x:表示文本,形状(B, T, vs)或者(B, T), seq_len:表示文本长度,形状(B)\n", " B = x.shape[0]\n", " T = x.shape[1]\n", " emb = self.get_last_hidden_state(x) # (B, T, C)\n", " ind = torch.arange(B, device=x.device)\n", " if seq_len == None:\n", " seq_len = torch.tensor([T] * B)\n", " # 获取最后一个词元的特征\n", " pooled_emb = emb[ind, seq_len - 1] # (B, C)\n", " score = self.score(pooled_emb) # (B, 1)\n", " return score\n", " \n", " def get_last_hidden_state(self, x):\n", " if len(x.shape) == 2:\n", " # x shape = (B, T)\n", " emb = self.embedding(x).last_hidden_state # (B, T, C)\n", " # 为后面使用gumbel_softmax做准备,直接与embedding的模型参数进行计算\n", " else:\n", " # x shape = (B, T, vs)\n", " w = self.embedding.get_input_embeddings().weight # (vs, C)\n", " inputs_embeds = x @ w # (B, T, C)\n", " emb = self.embedding(inputs_embeds=inputs_embeds).last_hidden_state\n", " return emb\n", "\n", "r_model = RewardModel(GPT2Model.from_pretrained('gpt2'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }