From 30f522d1a0646593842733037f1b278ea0f1fe7a Mon Sep 17 00:00:00 2001 From: Artem Chumachenko Date: Tue, 16 Apr 2024 19:25:29 +0200 Subject: [PATCH] Fix dummy cache allocation (#574) * Fix dummy cache allocation * Try mps device selecting * Rechain reloc --- src/petals/server/throughput.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/petals/server/throughput.py b/src/petals/server/throughput.py index d947179..c30d287 100644 --- a/src/petals/server/throughput.py +++ b/src/petals/server/throughput.py @@ -206,7 +206,7 @@ def measure_compute_rps( block = block.to(dtype) block = convert_block(block, 0, config, tensor_parallel_devices, device, quant_type=quant_type, freeze=True) - cache = (DUMMY_KEY_PAST.to(dtype), DUMMY_KEY_PAST.to(dtype)) + cache = (DUMMY_KEY_PAST.to(dtype=dtype, device=device), DUMMY_KEY_PAST.to(dtype=dtype, device=device)) elapsed = 0 dummy_input = torch.randn(1, n_tokens, config.hidden_size, device=device, dtype=dtype)