Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test/dtypes/test_nf4.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def test_smoketest_linear(self, dtype: torch.dtype):
_ = torch.nn.functional.linear(inp, a)
_ = torch.nn.functional.linear(inp, a_nf4)

@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
@unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
@parametrize("dtype", [torch.bfloat16, torch.float16, torch.float32])
def test_smoketest_linear_compile(self, dtype: torch.dtype):
if (
Expand Down
22 changes: 11 additions & 11 deletions test/quantization/test_quant_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,20 +643,20 @@ def test_module_fqn_to_config_module_name(self):
assert isinstance(model.linear2.weight, AffineQuantizedTensor)
assert isinstance(model.linear2.weight._layout, PlainLayout)

@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
@unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
def test_module_fqn_to_config_regex_basic(self):
config1 = Int4WeightOnlyConfig(
group_size=32, int4_packing_format="tile_packed_to_4d"
)
config = ModuleFqnToConfig({"re:linear.": config1})
model = ToyLinearModel().cuda().to(dtype=torch.bfloat16)
example_inputs = model.example_inputs(device="cuda", dtype=torch.bfloat16)
model = ToyLinearModel().to(_DEVICE).to(dtype=torch.bfloat16)
example_inputs = model.example_inputs(device=_DEVICE, dtype=torch.bfloat16)
quantize_(model, config, filter_fn=None)
model(*example_inputs)
assert isinstance(model.linear1.weight, Int4TilePackedTo4dTensor)
assert isinstance(model.linear2.weight, Int4TilePackedTo4dTensor)

@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
@unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
def test_module_fqn_to_config_regex_precedence(self):
"""Testing that full path config takes precedence over
regex config in ModuleFqnToConfig
Expand All @@ -666,14 +666,14 @@ def test_module_fqn_to_config_regex_precedence(self):
)
config2 = IntxWeightOnlyConfig()
config = ModuleFqnToConfig({"linear1": config1, "re:linear.": config2})
model = ToyLinearModel().cuda().to(dtype=torch.bfloat16)
example_inputs = model.example_inputs(device="cuda", dtype=torch.bfloat16)
model = ToyLinearModel().to(_DEVICE).to(dtype=torch.bfloat16)
example_inputs = model.example_inputs(device=_DEVICE, dtype=torch.bfloat16)
quantize_(model, config, filter_fn=None)
model(*example_inputs)
assert isinstance(model.linear1.weight, Int4TilePackedTo4dTensor)
assert isinstance(model.linear2.weight, IntxUnpackedToInt8Tensor)

@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
@unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
def test_module_fqn_to_config_regex_precedence2(self):
"""Testing that full path config takes precedence over
regex config in ModuleFqnToConfig, swapping
Expand All @@ -685,14 +685,14 @@ def test_module_fqn_to_config_regex_precedence2(self):
)
config2 = IntxWeightOnlyConfig()
config = ModuleFqnToConfig({"re:linear.": config2, "linear1": config1})
model = ToyLinearModel().cuda().to(dtype=torch.bfloat16)
example_inputs = model.example_inputs(device="cuda", dtype=torch.bfloat16)
model = ToyLinearModel().to(_DEVICE).to(dtype=torch.bfloat16)
example_inputs = model.example_inputs(device=_DEVICE, dtype=torch.bfloat16)
quantize_(model, config, filter_fn=None)
model(*example_inputs)
assert isinstance(model.linear1.weight, Int4TilePackedTo4dTensor)
assert isinstance(model.linear2.weight, IntxUnpackedToInt8Tensor)

@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
@unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
def test_module_fqn_to_config_regex_fullmatch(self):
"""Testing that we will only match the fqns that fully
matches the regex
Expand Down Expand Up @@ -731,7 +731,7 @@ def example_inputs(self):
"linear3_full_match.bias": None,
}
)
model = M(dtype=torch.bfloat16, device="cuda")
model = M(dtype=torch.bfloat16, device=_DEVICE)
example_inputs = model.example_inputs()
quantize_(model, config, filter_fn=None)
model(*example_inputs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def from_hp(
# Validate kernel requirements
orig_out_features, orig_in_features = hp_tensor.shape[-2:]
# TODO: relax checks to enable quantizing in other platoforms and run in A100
if not torch.cuda.get_device_capability()[0] >= 8:
if torch.cuda.is_available() and not torch.cuda.get_device_capability()[0] >= 8:
raise ValueError(
f"Cannot use tinygemm int4 kernel with a device of compute capability {torch.cuda.get_device_capability()}, the minimum compute capability is 8.0 for tensor core kernels."
)
Expand Down