|
@@ -45,6 +45,7 @@
|
|
|
},
|
|
},
|
|
|
"outputs": [],
|
|
"outputs": [],
|
|
|
"source": [
|
|
"source": [
|
|
|
|
|
+ "# 一些超参数\n",
|
|
|
"learning_rate = 6e-4\n",
|
|
"learning_rate = 6e-4\n",
|
|
|
"sequence_len = 1024\n",
|
|
"sequence_len = 1024\n",
|
|
|
"batch_size = 8\n",
|
|
"batch_size = 8\n",
|
|
@@ -63,6 +64,7 @@
|
|
|
"outputs": [],
|
|
"outputs": [],
|
|
|
"source": [
|
|
"source": [
|
|
|
"tokenizer = AutoTokenizer.from_pretrained('gpt2')\n",
|
|
"tokenizer = AutoTokenizer.from_pretrained('gpt2')\n",
|
|
|
|
|
+ "# 没有语言建模头的嵌入模型\n",
|
|
|
"model = GPT2Model.from_pretrained('gpt2')"
|
|
"model = GPT2Model.from_pretrained('gpt2')"
|
|
|
]
|
|
]
|
|
|
},
|
|
},
|
|
@@ -93,11 +95,17 @@
|
|
|
],
|
|
],
|
|
|
"source": [
|
|
"source": [
|
|
|
"def precoss(data):\n",
|
|
"def precoss(data):\n",
|
|
|
|
|
+ " '''\n",
|
|
|
|
|
+ " 生成训练文本和标签变量\n",
|
|
|
|
|
+ " '''\n",
|
|
|
" re = {}\n",
|
|
" re = {}\n",
|
|
|
" for i in range(2):\n",
|
|
" for i in range(2):\n",
|
|
|
" key = 'tokens_%s' % i\n",
|
|
" key = 'tokens_%s' % i\n",
|
|
|
|
|
+ " # prefix和completion两个字段已经经过了分词处理\n",
|
|
|
" re['input_ids_%s' % i] = data[key]['prefix'] + data[key]['completion']\n",
|
|
" re['input_ids_%s' % i] = data[key]['prefix'] + data[key]['completion']\n",
|
|
|
|
|
+ " # 记录文本的实际长度,用于后续的模型计算\n",
|
|
|
" re['input_len_%s' % i] = len(re['input_ids_%s' % i])\n",
|
|
" re['input_len_%s' % i] = len(re['input_ids_%s' % i])\n",
|
|
|
|
|
+ " # 根据数据说明,定义标签变量\n",
|
|
|
" re['label'] = 0 if data['score_0'] > 0 else 1\n",
|
|
" re['label'] = 0 if data['score_0'] > 0 else 1\n",
|
|
|
" return re\n",
|
|
" return re\n",
|
|
|
"\n",
|
|
"\n",
|
|
@@ -147,11 +155,16 @@
|
|
|
"from torch.nn.utils.rnn import pad_sequence\n",
|
|
"from torch.nn.utils.rnn import pad_sequence\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
"def token_collect(batch):\n",
|
|
"def token_collect(batch):\n",
|
|
|
|
|
+ " '''\n",
|
|
|
|
|
+ " 由于文本的长度不一,对于同一批次的训练数据,需要进行数据填充,使得长度一致\n",
|
|
|
|
|
+ " '''\n",
|
|
|
" re = {}\n",
|
|
" re = {}\n",
|
|
|
" for i in range(2):\n",
|
|
" for i in range(2):\n",
|
|
|
" ids = [data['input_ids_%s' % i] for data in batch]\n",
|
|
" ids = [data['input_ids_%s' % i] for data in batch]\n",
|
|
|
|
|
+ " # 对于较短的数据,用0在末尾进行填充\n",
|
|
|
" re['input_ids_%s' % i] = pad_sequence(ids, batch_first=True)\n",
|
|
" re['input_ids_%s' % i] = pad_sequence(ids, batch_first=True)\n",
|
|
|
" re['input_len_%s' % i] = torch.stack([data['input_len_%s' % i] for data in batch])\n",
|
|
" re['input_len_%s' % i] = torch.stack([data['input_len_%s' % i] for data in batch])\n",
|
|
|
|
|
+ " # 将标签变量也合并成一个张量\n",
|
|
|
" re['label'] = torch.stack([data['label'] for data in batch])\n",
|
|
" re['label'] = torch.stack([data['label'] for data in batch])\n",
|
|
|
" return re"
|
|
" return re"
|
|
|
]
|
|
]
|
|
@@ -197,9 +210,11 @@
|
|
|
"source": [
|
|
"source": [
|
|
|
"from torch.utils.data import DataLoader, random_split\n",
|
|
"from torch.utils.data import DataLoader, random_split\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
|
|
+ "# 划分训练集和测试集\n",
|
|
|
"train_set, test_set = random_split(dataset, [0.8, 0.2])\n",
|
|
"train_set, test_set = random_split(dataset, [0.8, 0.2])\n",
|
|
|
"train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, collate_fn=token_collect)\n",
|
|
"train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, collate_fn=token_collect)\n",
|
|
|
"test_loader = DataLoader(test_set, batch_size=3, shuffle=True, collate_fn=token_collect)\n",
|
|
"test_loader = DataLoader(test_set, batch_size=3, shuffle=True, collate_fn=token_collect)\n",
|
|
|
|
|
+ "# 训练数据示例\n",
|
|
|
"next(iter(train_loader))"
|
|
"next(iter(train_loader))"
|
|
|
]
|
|
]
|
|
|
},
|
|
},
|
|
@@ -255,13 +270,30 @@
|
|
|
"class RewardModel(nn.Module):\n",
|
|
"class RewardModel(nn.Module):\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
" def __init__(self, model):\n",
|
|
" def __init__(self, model):\n",
|
|
|
|
|
+ " '''\n",
|
|
|
|
|
+ " 评分模型\n",
|
|
|
|
|
+ " 参数\n",
|
|
|
|
|
+ " ----\n",
|
|
|
|
|
+ " model :嵌入模型\n",
|
|
|
|
|
+ " '''\n",
|
|
|
" super().__init__()\n",
|
|
" super().__init__()\n",
|
|
|
" self.embedding = model\n",
|
|
" self.embedding = model\n",
|
|
|
|
|
+ " # 评分建模头\n",
|
|
|
" self.score = nn.Linear(model.embed_dim, 1, bias=False)\n",
|
|
" self.score = nn.Linear(model.embed_dim, 1, bias=False)\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
" def forward(self, x, seq_len):\n",
|
|
" def forward(self, x, seq_len):\n",
|
|
|
- " # x:表示文本,形状(B, T), seq_len:表示文本长度,形状(B)\n",
|
|
|
|
|
|
|
+ " '''\n",
|
|
|
|
|
+ " 向前传播\n",
|
|
|
|
|
+ " 参数\n",
|
|
|
|
|
+ " ----\n",
|
|
|
|
|
+ " x :torch.LongTensor,文本,形状为(B, T)\n",
|
|
|
|
|
+ " seq_len :torch.LongTensor,文本的实际长度,形状为(B)\n",
|
|
|
|
|
+ " 返回\n",
|
|
|
|
|
+ " ----\n",
|
|
|
|
|
+ " score :torch.FloatTensor,评分,形状为(B, 1)\n",
|
|
|
|
|
+ " '''\n",
|
|
|
" B, _ = x.shape\n",
|
|
" B, _ = x.shape\n",
|
|
|
|
|
+ " # 文本的嵌入向量\n",
|
|
|
" emb = self.embedding(x).last_hidden_state # (B, T, C)\n",
|
|
" emb = self.embedding(x).last_hidden_state # (B, T, C)\n",
|
|
|
" ind = torch.arange(B, device=seq_len.device)\n",
|
|
" ind = torch.arange(B, device=seq_len.device)\n",
|
|
|
" # 获取最后一个词元的特征\n",
|
|
" # 获取最后一个词元的特征\n",
|
|
@@ -270,6 +302,7 @@
|
|
|
" return score\n",
|
|
" return score\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
"r_model = RewardModel(model)\n",
|
|
"r_model = RewardModel(model)\n",
|
|
|
|
|
+ "# 展示模型结构\n",
|
|
|
"r_model"
|
|
"r_model"
|
|
|
]
|
|
]
|
|
|
},
|
|
},
|
|
@@ -324,11 +357,15 @@
|
|
|
" lora_alpha=8,\n",
|
|
" lora_alpha=8,\n",
|
|
|
" target_modules=['c_attn'],\n",
|
|
" target_modules=['c_attn'],\n",
|
|
|
" lora_dropout=0.4,\n",
|
|
" lora_dropout=0.4,\n",
|
|
|
|
|
+ " # c_attn.weight的形状是(fan_in, fan_out),所以该参数设置为True\n",
|
|
|
|
|
+ " # 但需注意的是,普通的线性模型权重参数的形状是(fan_out, fan_in)\n",
|
|
|
" fan_in_fan_out=True,\n",
|
|
" fan_in_fan_out=True,\n",
|
|
|
" bias='none',\n",
|
|
" bias='none',\n",
|
|
|
|
|
+ " # 评分模型中的score层(评分建模头)也参与模型微调\n",
|
|
|
" modules_to_save=['score']\n",
|
|
" modules_to_save=['score']\n",
|
|
|
" )\n",
|
|
" )\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
|
|
+ "# 为评分模型添加LoRA适配器\n",
|
|
|
"r_model = PeftModel(r_model, config, adapter_name='lora')\n",
|
|
"r_model = PeftModel(r_model, config, adapter_name='lora')\n",
|
|
|
"print_trainable_parameters(r_model)"
|
|
"print_trainable_parameters(r_model)"
|
|
|
]
|
|
]
|
|
@@ -409,19 +446,37 @@
|
|
|
"class PreferenceModel(nn.Module):\n",
|
|
"class PreferenceModel(nn.Module):\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
" def __init__(self, model):\n",
|
|
" def __init__(self, model):\n",
|
|
|
|
|
+ " '''\n",
|
|
|
|
|
+ " 借鉴逻辑回归的思路,进行偏好建模\n",
|
|
|
|
|
+ " 参数\n",
|
|
|
|
|
+ " ----\n",
|
|
|
|
|
+ " model :评分模型\n",
|
|
|
|
|
+ " '''\n",
|
|
|
" super().__init__()\n",
|
|
" super().__init__()\n",
|
|
|
" self.pref = model\n",
|
|
" self.pref = model\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
" def forward(self, data):\n",
|
|
" def forward(self, data):\n",
|
|
|
|
|
+ " '''\n",
|
|
|
|
|
+ " 定义模型损失\n",
|
|
|
|
|
+ " 参数\n",
|
|
|
|
|
+ " ----\n",
|
|
|
|
|
+ " data :dict,训练数据\n",
|
|
|
|
|
+ " 返回\n",
|
|
|
|
|
+ " ----\n",
|
|
|
|
|
+ " out :torch.FloatTensor,logits,形状为(B, 2)\n",
|
|
|
|
|
+ " loss :torch.FloatTensor,模型损失\n",
|
|
|
|
|
+ " '''\n",
|
|
|
|
|
+ " # input0的形状是(B, T),len0的形状是(B)\n",
|
|
|
" input0, len0 = data['input_ids_0'], data['input_len_0']\n",
|
|
" input0, len0 = data['input_ids_0'], data['input_len_0']\n",
|
|
|
" input1, len1 = data['input_ids_1'], data['input_len_1']\n",
|
|
" input1, len1 = data['input_ids_1'], data['input_len_1']\n",
|
|
|
- " score0 = self.pref(input0, len0)\n",
|
|
|
|
|
- " score1 = self.pref(input1, len1)\n",
|
|
|
|
|
- " out = torch.concat((score0, score1), dim=1)\n",
|
|
|
|
|
|
|
+ " score0 = self.pref(input0, len0) # (B, 1)\n",
|
|
|
|
|
+ " score1 = self.pref(input1, len1) # (B, 1)\n",
|
|
|
|
|
+ " out = torch.concat((score0, score1), dim=1) # (B, 2)\n",
|
|
|
" loss = F.cross_entropy(out, data['label'])\n",
|
|
" loss = F.cross_entropy(out, data['label'])\n",
|
|
|
" return out, loss\n",
|
|
" return out, loss\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
"p_model = PreferenceModel(r_model).to(device)\n",
|
|
"p_model = PreferenceModel(r_model).to(device)\n",
|
|
|
|
|
+ "# 模型结构\n",
|
|
|
"p_model"
|
|
"p_model"
|
|
|
]
|
|
]
|
|
|
},
|
|
},
|
|
@@ -446,6 +501,7 @@
|
|
|
}
|
|
}
|
|
|
],
|
|
],
|
|
|
"source": [
|
|
"source": [
|
|
|
|
|
+ "# 利用示例数据验证模型是否搭建正确,并记录微调前的模型效果(方便与后续结果做对比)\n",
|
|
|
"example = test_set[:1]\n",
|
|
"example = test_set[:1]\n",
|
|
|
"with torch.no_grad():\n",
|
|
"with torch.no_grad():\n",
|
|
|
" p_model.eval()\n",
|
|
" p_model.eval()\n",
|
|
@@ -480,6 +536,10 @@
|
|
|
"from contextlib import nullcontext\n",
|
|
"from contextlib import nullcontext\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
"def estimate_loss(model, ctx=nullcontext()):\n",
|
|
"def estimate_loss(model, ctx=nullcontext()):\n",
|
|
|
|
|
+ " '''\n",
|
|
|
|
|
+ " 估计模型损失\n",
|
|
|
|
|
+ " ctx参数是为禁用LoRA或者混合精度做准备,当ctx=nullcontext()时,没有任何作用\n",
|
|
|
|
|
+ " '''\n",
|
|
|
" re = {}\n",
|
|
" re = {}\n",
|
|
|
" # 将模型切换至评估模式\n",
|
|
" # 将模型切换至评估模式\n",
|
|
|
" model.eval()\n",
|
|
" model.eval()\n",
|
|
@@ -496,7 +556,9 @@
|
|
|
" \"\"\"\n",
|
|
" \"\"\"\n",
|
|
|
" lossi = []\n",
|
|
" lossi = []\n",
|
|
|
" data_iter= iter(data_loader)\n",
|
|
" data_iter= iter(data_loader)\n",
|
|
|
|
|
+ " # 随机使用多个批量数据来预估模型效果\n",
|
|
|
" for k in range(eval_iters):\n",
|
|
" for k in range(eval_iters):\n",
|
|
|
|
|
+ " # 如果数据遍历完了,则重新生成一个data loader\n",
|
|
|
" data = next(data_iter, None)\n",
|
|
" data = next(data_iter, None)\n",
|
|
|
" if data is None:\n",
|
|
" if data is None:\n",
|
|
|
" data_iter = iter(data_loader)\n",
|
|
" data_iter = iter(data_loader)\n",
|
|
@@ -517,7 +579,7 @@
|
|
|
},
|
|
},
|
|
|
"outputs": [],
|
|
"outputs": [],
|
|
|
"source": [
|
|
"source": [
|
|
|
- "# 参考自https://github.com/karpathy/nanoGPT/blob/master/train.py\n",
|
|
|
|
|
|
|
+ "# get_lr的实现参考自https://github.com/karpathy/nanoGPT/blob/master/train.py\n",
|
|
|
"import math\n",
|
|
"import math\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
"warmup_iters = 100\n",
|
|
"warmup_iters = 100\n",
|
|
@@ -525,6 +587,10 @@
|
|
|
"min_lr = learning_rate / 10\n",
|
|
"min_lr = learning_rate / 10\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
"def get_lr(it):\n",
|
|
"def get_lr(it):\n",
|
|
|
|
|
+ " '''\n",
|
|
|
|
|
+ " 动态调整学习速率\n",
|
|
|
|
|
+ " it表示训练次数\n",
|
|
|
|
|
+ " '''\n",
|
|
|
" # 1、线性预热\n",
|
|
" # 1、线性预热\n",
|
|
|
" if it < warmup_iters:\n",
|
|
" if it < warmup_iters:\n",
|
|
|
" return learning_rate * it / warmup_iters\n",
|
|
" return learning_rate * it / warmup_iters\n",
|
|
@@ -546,6 +612,7 @@
|
|
|
},
|
|
},
|
|
|
"outputs": [],
|
|
"outputs": [],
|
|
|
"source": [
|
|
"source": [
|
|
|
|
|
+ "# 梯度裁剪的超参数\n",
|
|
|
"grad_clip = 1.0\n",
|
|
"grad_clip = 1.0\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
"def train_reward_optimum(model, optimizer, data_loader, max_iters=1000):\n",
|
|
"def train_reward_optimum(model, optimizer, data_loader, max_iters=1000):\n",
|
|
@@ -580,6 +647,7 @@
|
|
|
" optimizer.zero_grad(set_to_none=True)\n",
|
|
" optimizer.zero_grad(set_to_none=True)\n",
|
|
|
"\n",
|
|
"\n",
|
|
|
" if iter_num % eval_interval == 0:\n",
|
|
" if iter_num % eval_interval == 0:\n",
|
|
|
|
|
+ " # 预估模型损失时,也使用混合精度\n",
|
|
|
" stats = estimate_loss(model, ctx)\n",
|
|
" stats = estimate_loss(model, ctx)\n",
|
|
|
" train_loss = f'train loss {stats[\"train\"]:.4f}'\n",
|
|
" train_loss = f'train loss {stats[\"train\"]:.4f}'\n",
|
|
|
" eval_loss = f'test loss {stats[\"test\"]:.4f}'\n",
|
|
" eval_loss = f'test loss {stats[\"test\"]:.4f}'\n",
|
|
@@ -626,6 +694,7 @@
|
|
|
}
|
|
}
|
|
|
],
|
|
],
|
|
|
"source": [
|
|
"source": [
|
|
|
|
|
+ "# 设置最优化算法的参数\n",
|
|
|
"weight_decay = 1e-1\n",
|
|
"weight_decay = 1e-1\n",
|
|
|
"beta1 = 0.9\n",
|
|
"beta1 = 0.9\n",
|
|
|
"beta2 = 0.95\n",
|
|
"beta2 = 0.95\n",
|
|
@@ -692,6 +761,7 @@
|
|
|
}
|
|
}
|
|
|
],
|
|
],
|
|
|
"source": [
|
|
"source": [
|
|
|
|
|
+ "# 经过模型微调之后,评分模型的效果有所提升\n",
|
|
|
"with torch.no_grad():\n",
|
|
"with torch.no_grad():\n",
|
|
|
" p_model.eval()\n",
|
|
" p_model.eval()\n",
|
|
|
" print(p_model(example), example['label'])\n",
|
|
" print(p_model(example), example['label'])\n",
|