|
|
@@ -45,7 +45,7 @@
|
|
|
},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "# 一些超参数\n",
|
|
|
+ "# Some parameters\n",
|
|
|
"learning_rate = 6e-4\n",
|
|
|
"sequence_len = 1024\n",
|
|
|
"batch_size = 8\n",
|
|
|
@@ -64,7 +64,7 @@
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"tokenizer = AutoTokenizer.from_pretrained('gpt2')\n",
|
|
|
- "# 没有语言建模头的嵌入模型\n",
|
|
|
+ "# The model without language modeling head, just embedding model\n",
|
|
|
"model = GPT2Model.from_pretrained('gpt2')"
|
|
|
]
|
|
|
},
|
|
|
@@ -101,11 +101,11 @@
|
|
|
" re = {}\n",
|
|
|
" for i in range(2):\n",
|
|
|
" key = 'tokens_%s' % i\n",
|
|
|
- " # prefix和completion两个字段已经经过了分词处理\n",
|
|
|
+ " # 'prefix' and 'completion' already contain the results of tokenization\n",
|
|
|
" re['input_ids_%s' % i] = data[key]['prefix'] + data[key]['completion']\n",
|
|
|
- " # 记录文本的实际长度,用于后续的模型计算\n",
|
|
|
+ " # Record the length of text\n",
|
|
|
" re['input_len_%s' % i] = len(re['input_ids_%s' % i])\n",
|
|
|
- " # 根据数据说明,定义标签变量\n",
|
|
|
+ " # Define the label according to the score\n",
|
|
|
" re['label'] = 0 if data['score_0'] > 0 else 1\n",
|
|
|
" return re\n",
|
|
|
"\n",
|
|
|
@@ -156,15 +156,15 @@
|
|
|
"\n",
|
|
|
"def token_collect(batch):\n",
|
|
|
" '''\n",
|
|
|
- " 由于文本的长度不一,对于同一批次的训练数据,需要进行数据填充,使得长度一致\n",
|
|
|
+ " As the length of text is different, we need do string padding.\n",
|
|
|
" '''\n",
|
|
|
" re = {}\n",
|
|
|
" for i in range(2):\n",
|
|
|
" ids = [data['input_ids_%s' % i] for data in batch]\n",
|
|
|
- " # 对于较短的数据,用0在末尾进行填充\n",
|
|
|
+ " # Use 0 to do string padding\n",
|
|
|
" re['input_ids_%s' % i] = pad_sequence(ids, batch_first=True)\n",
|
|
|
" re['input_len_%s' % i] = torch.stack([data['input_len_%s' % i] for data in batch])\n",
|
|
|
- " # 将标签变量也合并成一个张量\n",
|
|
|
+ " # Concatenate the label variable\n",
|
|
|
" re['label'] = torch.stack([data['label'] for data in batch])\n",
|
|
|
" return re"
|
|
|
]
|
|
|
@@ -210,11 +210,11 @@
|
|
|
"source": [
|
|
|
"from torch.utils.data import DataLoader, random_split\n",
|
|
|
"\n",
|
|
|
- "# 划分训练集和测试集\n",
|
|
|
+ "# Split data into train set and test set\n",
|
|
|
"train_set, test_set = random_split(dataset, [0.8, 0.2])\n",
|
|
|
"train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, collate_fn=token_collect)\n",
|
|
|
"test_loader = DataLoader(test_set, batch_size=3, shuffle=True, collate_fn=token_collect)\n",
|
|
|
- "# 训练数据示例\n",
|
|
|
+ "# An example\n",
|
|
|
"next(iter(train_loader))"
|
|
|
]
|
|
|
},
|
|
|
@@ -271,38 +271,40 @@
|
|
|
"\n",
|
|
|
" def __init__(self, model):\n",
|
|
|
" '''\n",
|
|
|
- " 评分模型\n",
|
|
|
- " 参数\n",
|
|
|
+ " Reward modeling\n",
|
|
|
+ " \n",
|
|
|
+ " Args:\n",
|
|
|
" ----\n",
|
|
|
- " model :嵌入模型\n",
|
|
|
+ " model: Embedding model\n",
|
|
|
" '''\n",
|
|
|
" super().__init__()\n",
|
|
|
" self.embedding = model\n",
|
|
|
- " # 评分建模头\n",
|
|
|
+ " # Score modeling head\n",
|
|
|
" self.score = nn.Linear(model.embed_dim, 1, bias=False)\n",
|
|
|
"\n",
|
|
|
" def forward(self, x, seq_len):\n",
|
|
|
" '''\n",
|
|
|
- " 向前传播\n",
|
|
|
- " 参数\n",
|
|
|
+ " Forward pass\n",
|
|
|
+ " \n",
|
|
|
+ " Args:\n",
|
|
|
" ----\n",
|
|
|
- " x :torch.LongTensor,文本,形状为(B, T)\n",
|
|
|
- " seq_len :torch.LongTensor,文本的实际长度,形状为(B)\n",
|
|
|
+ " x: torch.LongTensor, text, shape (B, T)\n",
|
|
|
+ " seq_len: torch.LongTensor, The length of text before padding, shape (B)\n",
|
|
|
" 返回\n",
|
|
|
" ----\n",
|
|
|
- " score :torch.FloatTensor,评分,形状为(B, 1)\n",
|
|
|
+ " score: torch.FloatTensor, scores, shape(B, 1)\n",
|
|
|
" '''\n",
|
|
|
" B, _ = x.shape\n",
|
|
|
- " # 文本的嵌入向量\n",
|
|
|
+ " # The embedding of text\n",
|
|
|
" emb = self.embedding(x).last_hidden_state # (B, T, C)\n",
|
|
|
" ind = torch.arange(B, device=seq_len.device)\n",
|
|
|
- " # 获取最后一个词元的特征\n",
|
|
|
+ " # Get the feature of the last token\n",
|
|
|
" pooled_emb = emb[ind, seq_len - 1] # (B, C)\n",
|
|
|
" score = self.score(pooled_emb) # (B, 1)\n",
|
|
|
" return score\n",
|
|
|
"\n",
|
|
|
"r_model = RewardModel(model)\n",
|
|
|
- "# 展示模型结构\n",
|
|
|
+ "# The structure of model\n",
|
|
|
"r_model"
|
|
|
]
|
|
|
},
|
|
|
@@ -316,7 +318,7 @@
|
|
|
"source": [
|
|
|
"def print_trainable_parameters(model):\n",
|
|
|
" \"\"\"\n",
|
|
|
- " 输出模型中可供训练的参数个数\n",
|
|
|
+ " Print the number of trainable parameters\n",
|
|
|
" \"\"\"\n",
|
|
|
" trainable_params = 0\n",
|
|
|
" all_param = 0\n",
|
|
|
@@ -357,15 +359,15 @@
|
|
|
" lora_alpha=8,\n",
|
|
|
" target_modules=['c_attn'],\n",
|
|
|
" lora_dropout=0.4,\n",
|
|
|
- " # c_attn.weight的形状是(fan_in, fan_out),所以该参数设置为True\n",
|
|
|
- " # 但需注意的是,普通的线性模型权重参数的形状是(fan_out, fan_in)\n",
|
|
|
+ " # As the shape of c_attn.weight is (fan_in, fan_out), set this parameter to True\n",
|
|
|
+ " # Note: for linear model, the shape of weight is (fan_out, fan_in)\n",
|
|
|
" fan_in_fan_out=True,\n",
|
|
|
" bias='none',\n",
|
|
|
- " # 评分模型中的score层(评分建模头)也参与模型微调\n",
|
|
|
+ " # The score modeling head also participats fine-tuning\n",
|
|
|
" modules_to_save=['score']\n",
|
|
|
" )\n",
|
|
|
"\n",
|
|
|
- "# 为评分模型添加LoRA适配器\n",
|
|
|
+ "# Add LoRA adapter to model\n",
|
|
|
"r_model = PeftModel(r_model, config, adapter_name='lora')\n",
|
|
|
"print_trainable_parameters(r_model)"
|
|
|
]
|
|
|
@@ -447,26 +449,29 @@
|
|
|
"\n",
|
|
|
" def __init__(self, model):\n",
|
|
|
" '''\n",
|
|
|
- " 借鉴逻辑回归的思路,进行偏好建模\n",
|
|
|
- " 参数\n",
|
|
|
+ " Build preference model according to the structure of logistic regression\n",
|
|
|
+ " \n",
|
|
|
+ " Args:\n",
|
|
|
" ----\n",
|
|
|
- " model :评分模型\n",
|
|
|
+ " model: Reward model\n",
|
|
|
" '''\n",
|
|
|
" super().__init__()\n",
|
|
|
" self.pref = model\n",
|
|
|
"\n",
|
|
|
" def forward(self, data):\n",
|
|
|
" '''\n",
|
|
|
- " 定义模型损失\n",
|
|
|
- " 参数\n",
|
|
|
+ " Define model loss\n",
|
|
|
+ " \n",
|
|
|
+ " Args:\n",
|
|
|
" ----\n",
|
|
|
- " data :dict,训练数据\n",
|
|
|
- " 返回\n",
|
|
|
+ " data: dict, train data\n",
|
|
|
+ " \n",
|
|
|
+ " Returns:\n",
|
|
|
" ----\n",
|
|
|
- " out :torch.FloatTensor,logits,形状为(B, 2)\n",
|
|
|
- " loss :torch.FloatTensor,模型损失\n",
|
|
|
+ " out: torch.FloatTensor, The predictions, shape (B, 2)\n",
|
|
|
+ " loss: torch.FloatTensor, model loss\n",
|
|
|
" '''\n",
|
|
|
- " # input0的形状是(B, T),len0的形状是(B)\n",
|
|
|
+ " # The shape of input0 is (B, T), the shape of len0 is (B)\n",
|
|
|
" input0, len0 = data['input_ids_0'], data['input_len_0']\n",
|
|
|
" input1, len1 = data['input_ids_1'], data['input_len_1']\n",
|
|
|
" score0 = self.pref(input0, len0) # (B, 1)\n",
|
|
|
@@ -476,7 +481,7 @@
|
|
|
" return out, loss\n",
|
|
|
"\n",
|
|
|
"p_model = PreferenceModel(r_model).to(device)\n",
|
|
|
- "# 模型结构\n",
|
|
|
+ "# The structure of model\n",
|
|
|
"p_model"
|
|
|
]
|
|
|
},
|
|
|
@@ -501,7 +506,7 @@
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
- "# 利用示例数据验证模型是否搭建正确,并记录微调前的模型效果(方便与后续结果做对比)\n",
|
|
|
+ "# Use an example to show test the model and record the result for latter comparison\n",
|
|
|
"example = test_set[:1]\n",
|
|
|
"with torch.no_grad():\n",
|
|
|
" p_model.eval()\n",
|
|
|
@@ -537,28 +542,29 @@
|
|
|
"\n",
|
|
|
"def estimate_loss(model, ctx=nullcontext()):\n",
|
|
|
" '''\n",
|
|
|
- " 估计模型损失\n",
|
|
|
- " ctx参数是为禁用LoRA或者混合精度做准备,当ctx=nullcontext()时,没有任何作用\n",
|
|
|
+ " Estimate the performance of model.\n",
|
|
|
+ " Note: ctx is used for disabling LoRA or mixed precision.\n",
|
|
|
+ " When ctx=nullcontext(), it have no effect.\n",
|
|
|
" '''\n",
|
|
|
" re = {}\n",
|
|
|
- " # 将模型切换至评估模式\n",
|
|
|
+ " # Put the model on evaluation mode\n",
|
|
|
" model.eval()\n",
|
|
|
" re['train'] = _loss(model, train_loader, ctx)\n",
|
|
|
" re['test'] = _loss(model, test_loader, ctx)\n",
|
|
|
- " # 将模型切换至训练模式\n",
|
|
|
+ " # Put the model on train mode\n",
|
|
|
" model.train()\n",
|
|
|
" return re\n",
|
|
|
"\n",
|
|
|
"@torch.no_grad()\n",
|
|
|
"def _loss(model, data_loader, ctx):\n",
|
|
|
" \"\"\"\n",
|
|
|
- " 计算模型在不同数据集下面的评估指标\n",
|
|
|
+ " Measure the performance of model based on different data sets.\n",
|
|
|
" \"\"\"\n",
|
|
|
" lossi = []\n",
|
|
|
" data_iter= iter(data_loader)\n",
|
|
|
- " # 随机使用多个批量数据来预估模型效果\n",
|
|
|
+ " # Use eval_iters batch data to measure the performance\n",
|
|
|
" for k in range(eval_iters):\n",
|
|
|
- " # 如果数据遍历完了,则重新生成一个data loader\n",
|
|
|
+ " # After one iteration, create another data loader\n",
|
|
|
" data = next(data_iter, None)\n",
|
|
|
" if data is None:\n",
|
|
|
" data_iter = iter(data_loader)\n",
|
|
|
@@ -579,7 +585,7 @@
|
|
|
},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "# get_lr的实现参考自https://github.com/karpathy/nanoGPT/blob/master/train.py\n",
|
|
|
+ "# The code of get_lr is inspired by https://github.com/karpathy/nanoGPT/blob/master/train.py\n",
|
|
|
"import math\n",
|
|
|
"\n",
|
|
|
"warmup_iters = 100\n",
|
|
|
@@ -588,16 +594,16 @@
|
|
|
"\n",
|
|
|
"def get_lr(it):\n",
|
|
|
" '''\n",
|
|
|
- " 动态调整学习速率\n",
|
|
|
- " it表示训练次数\n",
|
|
|
+ " Adjust learning rate dynamically \n",
|
|
|
+ " it means the step of training\n",
|
|
|
" '''\n",
|
|
|
- " # 1、线性预热\n",
|
|
|
+ " # 1. Linear warmup\n",
|
|
|
" if it < warmup_iters:\n",
|
|
|
" return learning_rate * it / warmup_iters\n",
|
|
|
- " # 2、超出lr_decay_iters,则返回min_lr\n",
|
|
|
+ " # 2. If exceeding lr_decay_iters, return min_lr\n",
|
|
|
" if it > lr_decay_iters:\n",
|
|
|
" return min_lr\n",
|
|
|
- " # 3、逐步衰减学习速率\n",
|
|
|
+ " # 3. decay learning rate\n",
|
|
|
" decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)\n",
|
|
|
" assert 0 <= decay_ratio <= 1\n",
|
|
|
" coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio))\n",
|
|
|
@@ -612,7 +618,7 @@
|
|
|
},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "# 梯度裁剪的超参数\n",
|
|
|
+ "# The parameter for gradient clipping\n",
|
|
|
"grad_clip = 1.0\n",
|
|
|
"\n",
|
|
|
"def train_reward_optimum(model, optimizer, data_loader, max_iters=1000):\n",
|
|
|
@@ -621,25 +627,25 @@
|
|
|
" data_iter = iter(data_loader)\n",
|
|
|
"\n",
|
|
|
" for iter_num in range(max_iters):\n",
|
|
|
- " # 动态调整学习率\n",
|
|
|
+ " # Get learning rate\n",
|
|
|
" lr = get_lr(iter_num + 1)\n",
|
|
|
" for param_group in optimizer.param_groups:\n",
|
|
|
" param_group['lr'] = lr\n",
|
|
|
+ " # Gradient accumulation\n",
|
|
|
" for i in range(gra_acc_steps):\n",
|
|
|
" data = next(data_iter, None)\n",
|
|
|
" if data is None:\n",
|
|
|
" data_iter = iter(data_loader)\n",
|
|
|
" data = next(data_iter, None)\n",
|
|
|
- " # 混合进度训练\n",
|
|
|
- " ## 如果是用CPU进行计算,可能需要将dtype变成torch.bfloat16\n",
|
|
|
- " ## 当然如果使用CPU,需要非常长的时间\n",
|
|
|
+ " # Mixed precision\n",
|
|
|
+ " # If using a CPU, set dtype to torch.bfloat16\n",
|
|
|
" ctx = torch.autocast(device_type=device, dtype=torch.float16)\n",
|
|
|
" with ctx:\n",
|
|
|
" _, loss = model(data)\n",
|
|
|
" lossi.append(loss.item())\n",
|
|
|
" loss *= 1 / gra_acc_steps\n",
|
|
|
" scaler.scale(loss).backward()\n",
|
|
|
- " # 梯度裁剪\n",
|
|
|
+ " # Gradient clipping\n",
|
|
|
" scaler.unscale_(optimizer)\n",
|
|
|
" clip_grad_norm_(model.parameters(), grad_clip)\n",
|
|
|
" scaler.step(optimizer)\n",
|
|
|
@@ -647,7 +653,7 @@
|
|
|
" optimizer.zero_grad(set_to_none=True)\n",
|
|
|
"\n",
|
|
|
" if iter_num % eval_interval == 0:\n",
|
|
|
- " # 预估模型损失时,也使用混合精度\n",
|
|
|
+ " # Measure the performance (use mixed precision)\n",
|
|
|
" stats = estimate_loss(model, ctx)\n",
|
|
|
" train_loss = f'train loss {stats[\"train\"]:.4f}'\n",
|
|
|
" eval_loss = f'test loss {stats[\"test\"]:.4f}'\n",
|
|
|
@@ -694,7 +700,7 @@
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
- "# 设置最优化算法的参数\n",
|
|
|
+ "# Parameters for AdamW\n",
|
|
|
"weight_decay = 1e-1\n",
|
|
|
"beta1 = 0.9\n",
|
|
|
"beta2 = 0.95\n",
|
|
|
@@ -761,7 +767,7 @@
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
- "# 经过模型微调之后,评分模型的效果有所提升\n",
|
|
|
+ "# After fine-tuning, the performance of reward model has been improved\n",
|
|
|
"with torch.no_grad():\n",
|
|
|
" p_model.eval()\n",
|
|
|
" print(p_model(example), example['label'])\n",
|