Searched refs:n_head (Results 1 – 5 of 5) sorted by relevance
/aosp_15_r20/external/pytorch/benchmarks/gpt_fast/ |
H A D | model.py | 22 n_head: int = 32 variable in ModelArgs 32 self.n_local_heads = self.n_head 37 self.head_dim = self.dim // self.n_head 65 "7B": dict(n_layer=32, n_head=32, dim=4096), 66 "13B": dict(n_layer=40, n_head=40, dim=5120), 67 "30B": dict(n_layer=60, n_head=52, dim=6656), 70 n_head=64, 78 n_layer=80, n_head=64, dim=8192, n_local_heads=8, intermediate_size=28672 82 n_head=32, 135 head_dim = self.config.dim // self.config.n_head [all …]
|
H A D | mixtral_moe_model.py | 22 n_head: int = 32 variable in ModelArgs 34 self.n_local_heads = self.n_head 39 self.head_dim = self.dim // self.n_head 59 n_head=32, 114 head_dim = self.config.dim // self.config.n_head 125 self.config.dim // self.config.n_head, 168 assert config.dim % config.n_head == 0 170 total_head_dim = (config.n_head + 2 * config.n_local_heads) * config.head_dim 176 self.n_head = config.n_head 201 q = q.view(bsz, seqlen, self.n_head, self.head_dim) [all …]
|
/aosp_15_r20/external/pytorch/test/ |
H A D | test_transformers.py | 1810 n_head, argument 1824 n_embd = n_head * head_dim 1826 shape = SdpaShape(batch_size, n_head, seq_len, head_dim) 1843 k = k.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2) 1844 q = q.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2) 1845 v = v.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2) 1846 k2 = k2.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2) 1847 q2 = q2.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2) 1848 v2 = v2.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2) 1893 n_head, argument [all …]
|
/aosp_15_r20/external/pytorch/torch/_inductor/fx_passes/ |
H A D | fuse_attention.py | 383 n_head = query.size(2) 388 (attn_mask == 1).view((bs, 1, 1, k_len)).expand((bs, n_head, q_len, k_len)) 453 n_head = query.size(2) 458 (attn_mask == 1).view((bs, 1, 1, k_len)).expand((bs, n_head, q_len, k_len))
|
/aosp_15_r20/external/pytorch/test/onnx/ |
H A D | test_fx_to_onnx_with_onnxruntime.py | 1328 vocab_size=8096, n_positions=256, n_embd=256, n_layer=2, n_head=2
|