Home
last modified time | relevance | path

Searched refs:n_head (Results 1 – 5 of 5) sorted by relevance

/aosp_15_r20/external/pytorch/benchmarks/gpt_fast/
H A Dmodel.py22 n_head: int = 32 variable in ModelArgs
32 self.n_local_heads = self.n_head
37 self.head_dim = self.dim // self.n_head
65 "7B": dict(n_layer=32, n_head=32, dim=4096),
66 "13B": dict(n_layer=40, n_head=40, dim=5120),
67 "30B": dict(n_layer=60, n_head=52, dim=6656),
70 n_head=64,
78 n_layer=80, n_head=64, dim=8192, n_local_heads=8, intermediate_size=28672
82 n_head=32,
135 head_dim = self.config.dim // self.config.n_head
[all …]
H A Dmixtral_moe_model.py22 n_head: int = 32 variable in ModelArgs
34 self.n_local_heads = self.n_head
39 self.head_dim = self.dim // self.n_head
59 n_head=32,
114 head_dim = self.config.dim // self.config.n_head
125 self.config.dim // self.config.n_head,
168 assert config.dim % config.n_head == 0
170 total_head_dim = (config.n_head + 2 * config.n_local_heads) * config.head_dim
176 self.n_head = config.n_head
201 q = q.view(bsz, seqlen, self.n_head, self.head_dim)
[all …]
/aosp_15_r20/external/pytorch/test/
H A Dtest_transformers.py1810 n_head, argument
1824 n_embd = n_head * head_dim
1826 shape = SdpaShape(batch_size, n_head, seq_len, head_dim)
1843 k = k.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2)
1844 q = q.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2)
1845 v = v.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2)
1846 k2 = k2.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2)
1847 q2 = q2.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2)
1848 v2 = v2.view(batch_size, seq_len, n_head, head_dim).transpose(1, 2)
1893 n_head, argument
[all …]
/aosp_15_r20/external/pytorch/torch/_inductor/fx_passes/
H A Dfuse_attention.py383 n_head = query.size(2)
388 (attn_mask == 1).view((bs, 1, 1, k_len)).expand((bs, n_head, q_len, k_len))
453 n_head = query.size(2)
458 (attn_mask == 1).view((bs, 1, 1, k_len)).expand((bs, n_head, q_len, k_len))
/aosp_15_r20/external/pytorch/test/onnx/
H A Dtest_fx_to_onnx_with_onnxruntime.py1328 vocab_size=8096, n_positions=256, n_embd=256, n_layer=2, n_head=2