- 1]
# [PSL] => [1, PSL] => [BS, PSL]
prefix_tokens = self.prefix_tokens.unsqueeze(0...).expand(batch_size, -1).to(device)
# [BS, PSL, KVS=NL * HS * 2GC]
past_key_values =...self.prefix_encoder(prefix_tokens).type(dtype)
# [BS, PSL, KVS=NL * HS * 2GC] => [BS, PSL, 2NL..., 2NL, GC, HS] => [2NL, PSL, BS, GC, HS] => NL * [2, PSL, BS, GC, HS]
past_key_values = past_key_values.permute...if self.pre_seq_len is not None:
# 如果没有提供 KV 缓存,初始化为前 PSL 个前缀的词嵌入
if