{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<torch._C.Generator at 0x7fc57cc63110>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "from transformers import AutoModel, AutoTokenizer, GPT2LMHeadModel, GPT2Model\n",
    "\n",
    "\n",
    "torch.manual_seed(12046)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = 6e-4\n",
    "sequence_len = 1024\n",
    "batch_size = 8\n",
    "gra_acc_steps = 8 * 2\n",
    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
    "eval_iters = 64 * 2\n",
    "eval_interval = 50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = GPT2LMHeadModel.from_pretrained('gpt2')\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GPT2LMHeadModel(\n",
       "  (transformer): GPT2Model(\n",
       "    (wte): Embedding(50257, 768)\n",
       "    (wpe): Embedding(1024, 768)\n",
       "    (drop): Dropout(p=0.1, inplace=False)\n",
       "    (h): ModuleList(\n",
       "      (0-11): 12 x GPT2Block(\n",
       "        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
       "        (attn): GPT2Attention(\n",
       "          (c_attn): Conv1D()\n",
       "          (c_proj): Conv1D()\n",
       "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
       "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
       "        )\n",
       "        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
       "        (mlp): GPT2MLP(\n",
       "          (c_fc): Conv1D()\n",
       "          (c_proj): Conv1D()\n",
       "          (act): NewGELUActivation()\n",
       "          (dropout): Dropout(p=0.1, inplace=False)\n",
       "        )\n",
       "      )\n",
       "    )\n",
       "    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
       "  )\n",
       "  (lm_head): Linear(in_features=768, out_features=50257, bias=False)\n",
       ")"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "class RewardModel(nn.Module):\n",
    "\n",
    "    def __init__(self, model):\n",
    "        super().__init__()\n",
    "        self.embedding = model\n",
    "        self.score = nn.Linear(model.embed_dim, 1, bias=False)\n",
    "\n",
    "    def forward(self, x, seq_len=None):\n",
    "        # x：表示文本，形状(B, T, vs)或者(B, T), seq_len：表示文本长度，形状(B)\n",
    "        B = x.shape[0]\n",
    "        T = x.shape[1]\n",
    "        emb = self.get_last_hidden_state(x)     # (B, T, C)\n",
    "        ind = torch.arange(B, device=x.device)\n",
    "        if seq_len == None:\n",
    "            seq_len = torch.tensor([T] * B)\n",
    "        # 获取最后一个词元的特征\n",
    "        pooled_emb = emb[ind, seq_len - 1]      # (B,    C)\n",
    "        score = self.score(pooled_emb)          # (B,    1)\n",
    "        return score\n",
    "    \n",
    "    def get_last_hidden_state(self, x):\n",
    "        if len(x.shape) == 2:\n",
    "            # x shape = (B, T)\n",
    "            emb = self.embedding(x).last_hidden_state  # (B, T, C)\n",
    "        # 为后面使用gumbel_softmax做准备，直接与embedding的模型参数进行计算\n",
    "        else:\n",
    "            # x shape = (B, T, vs)\n",
    "            w = self.embedding.get_input_embeddings().weight  # (vs, C)\n",
    "            inputs_embeds = x @ w  # (B, T, C)\n",
    "            emb = self.embedding(inputs_embeds=inputs_embeds).last_hidden_state\n",
    "        return emb\n",
    "\n",
    "r_model = RewardModel(GPT2Model.from_pretrained('gpt2'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}