Skip to content

Commit 423c764

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Adding tpu_topology to Vertex SDK
PiperOrigin-RevId: 625144116
1 parent 9c11ea5 commit 423c764

File tree

2 files changed

+74
-0
lines changed

2 files changed

+74
-0
lines changed

google/cloud/aiplatform/models.py

+32
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,7 @@ def deploy(
772772
max_replica_count: int = 1,
773773
accelerator_type: Optional[str] = None,
774774
accelerator_count: Optional[int] = None,
775+
tpu_topology: Optional[str] = None,
775776
service_account: Optional[str] = None,
776777
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
777778
explanation_parameters: Optional[
@@ -833,6 +834,9 @@ def deploy(
833834
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
834835
accelerator_count (int):
835836
Optional. The number of accelerators to attach to a worker replica.
837+
tpu_topology (str):
838+
Optional. The TPU topology to use for the DeployedModel.
839+
Required for CloudTPU multihost deployments.
836840
service_account (str):
837841
The service account that the DeployedModel's container runs as. Specify the
838842
email address of the service account. If this service account is not
@@ -896,6 +900,7 @@ def deploy(
896900
max_replica_count=max_replica_count,
897901
accelerator_type=accelerator_type,
898902
accelerator_count=accelerator_count,
903+
tpu_topology=tpu_topology,
899904
service_account=service_account,
900905
explanation_spec=explanation_spec,
901906
metadata=metadata,
@@ -919,6 +924,7 @@ def _deploy(
919924
max_replica_count: int = 1,
920925
accelerator_type: Optional[str] = None,
921926
accelerator_count: Optional[int] = None,
927+
tpu_topology: Optional[str] = None,
922928
service_account: Optional[str] = None,
923929
explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
924930
metadata: Optional[Sequence[Tuple[str, str]]] = (),
@@ -977,6 +983,9 @@ def _deploy(
977983
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
978984
accelerator_count (int):
979985
Optional. The number of accelerators to attach to a worker replica.
986+
tpu_topology (str):
987+
Optional. The TPU topology to use for the DeployedModel.
988+
Required for CloudTPU multihost deployments.
980989
service_account (str):
981990
The service account that the DeployedModel's container runs as. Specify the
982991
email address of the service account. If this service account is not
@@ -1026,6 +1035,7 @@ def _deploy(
10261035
max_replica_count=max_replica_count,
10271036
accelerator_type=accelerator_type,
10281037
accelerator_count=accelerator_count,
1038+
tpu_topology=tpu_topology,
10291039
service_account=service_account,
10301040
explanation_spec=explanation_spec,
10311041
metadata=metadata,
@@ -1056,6 +1066,7 @@ def _deploy_call(
10561066
max_replica_count: int = 1,
10571067
accelerator_type: Optional[str] = None,
10581068
accelerator_count: Optional[int] = None,
1069+
tpu_topology: Optional[str] = None,
10591070
service_account: Optional[str] = None,
10601071
explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
10611072
metadata: Optional[Sequence[Tuple[str, str]]] = (),
@@ -1123,6 +1134,9 @@ def _deploy_call(
11231134
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
11241135
accelerator_count (int):
11251136
Optional. The number of accelerators to attach to a worker replica.
1137+
tpu_topology (str):
1138+
Optional. The TPU topology to use for the DeployedModel.
1139+
Required for CloudTPU multihost deployments.
11261140
service_account (str):
11271141
The service account that the DeployedModel's container runs as. Specify the
11281142
email address of the service account. If this service account is not
@@ -1250,6 +1264,9 @@ def _deploy_call(
12501264
[autoscaling_metric_spec]
12511265
)
12521266

1267+
if tpu_topology is not None:
1268+
machine_spec.tpu_topology = tpu_topology
1269+
12531270
dedicated_resources.machine_spec = machine_spec
12541271
deployed_model.dedicated_resources = dedicated_resources
12551272

@@ -2440,6 +2457,7 @@ def deploy(
24402457
max_replica_count: int = 1,
24412458
accelerator_type: Optional[str] = None,
24422459
accelerator_count: Optional[int] = None,
2460+
tpu_topology: Optional[str] = None,
24432461
service_account: Optional[str] = None,
24442462
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
24452463
explanation_parameters: Optional[
@@ -2487,6 +2505,9 @@ def deploy(
24872505
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
24882506
accelerator_count (int):
24892507
Optional. The number of accelerators to attach to a worker replica.
2508+
tpu_topology (str):
2509+
Optional. The TPU topology to use for the DeployedModel.
2510+
Required for CloudTPU multihost deployments.
24902511
service_account (str):
24912512
The service account that the DeployedModel's container runs as. Specify the
24922513
email address of the service account. If this service account is not
@@ -2534,6 +2555,7 @@ def deploy(
25342555
max_replica_count=max_replica_count,
25352556
accelerator_type=accelerator_type,
25362557
accelerator_count=accelerator_count,
2558+
tpu_topology=tpu_topology,
25372559
service_account=service_account,
25382560
explanation_spec=explanation_spec,
25392561
metadata=metadata,
@@ -3442,6 +3464,7 @@ def deploy(
34423464
max_replica_count: int = 1,
34433465
accelerator_type: Optional[str] = None,
34443466
accelerator_count: Optional[int] = None,
3467+
tpu_topology: Optional[str] = None,
34453468
service_account: Optional[str] = None,
34463469
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
34473470
explanation_parameters: Optional[
@@ -3505,6 +3528,9 @@ def deploy(
35053528
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
35063529
accelerator_count (int):
35073530
Optional. The number of accelerators to attach to a worker replica.
3531+
tpu_topology (str):
3532+
Optional. The TPU topology to use for the DeployedModel.
3533+
Requireid for CloudTPU multihost deployments.
35083534
service_account (str):
35093535
The service account that the DeployedModel's container runs as. Specify the
35103536
email address of the service account. If this service account is not
@@ -3601,6 +3627,7 @@ def deploy(
36013627
max_replica_count=max_replica_count,
36023628
accelerator_type=accelerator_type,
36033629
accelerator_count=accelerator_count,
3630+
tpu_topology=tpu_topology,
36043631
service_account=service_account,
36053632
explanation_spec=explanation_spec,
36063633
metadata=metadata,
@@ -3627,6 +3654,7 @@ def _deploy(
36273654
max_replica_count: int = 1,
36283655
accelerator_type: Optional[str] = None,
36293656
accelerator_count: Optional[int] = None,
3657+
tpu_topology: Optional[str] = None,
36303658
service_account: Optional[str] = None,
36313659
explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
36323660
metadata: Optional[Sequence[Tuple[str, str]]] = (),
@@ -3687,6 +3715,9 @@ def _deploy(
36873715
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
36883716
accelerator_count (int):
36893717
Optional. The number of accelerators to attach to a worker replica.
3718+
tpu_topology (str):
3719+
Optional. The TPU topology to use for the DeployedModel.
3720+
Requireid for CloudTPU multihost deployments.
36903721
service_account (str):
36913722
The service account that the DeployedModel's container runs as. Specify the
36923723
email address of the service account. If this service account is not
@@ -3777,6 +3808,7 @@ def _deploy(
37773808
max_replica_count=max_replica_count,
37783809
accelerator_type=accelerator_type,
37793810
accelerator_count=accelerator_count,
3811+
tpu_topology=tpu_topology,
37803812
service_account=service_account,
37813813
explanation_spec=explanation_spec,
37823814
metadata=metadata,

tests/unit/aiplatform/test_models.py

+42
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@
132132
_TEST_STARTING_REPLICA_COUNT = 2
133133
_TEST_MAX_REPLICA_COUNT = 12
134134

135+
_TEST_TPU_MACHINE_TYPE = "ct5lp-hightpu-4t"
136+
_TEST_TPU_TOPOLOGY = "2x2"
137+
135138
_TEST_BATCH_SIZE = 16
136139

137140
_TEST_PIPELINE_RESOURCE_NAME = (
@@ -2077,6 +2080,45 @@ def test_deploy_no_endpoint_dedicated_resources(self, deploy_model_mock, sync):
20772080
timeout=None,
20782081
)
20792082

2083+
@pytest.mark.usefixtures(
2084+
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
2085+
)
2086+
@pytest.mark.parametrize("sync", [True, False])
2087+
def test_deploy_no_endpoint_with_tpu_topology(self, deploy_model_mock, sync):
2088+
test_model = models.Model(_TEST_ID)
2089+
test_model._gca_resource.supported_deployment_resources_types.append(
2090+
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
2091+
)
2092+
test_endpoint = test_model.deploy(
2093+
machine_type=_TEST_TPU_MACHINE_TYPE,
2094+
tpu_topology=_TEST_TPU_TOPOLOGY,
2095+
sync=sync,
2096+
deploy_request_timeout=None,
2097+
)
2098+
2099+
if not sync:
2100+
test_endpoint.wait()
2101+
2102+
expected_machine_spec = gca_machine_resources.MachineSpec(
2103+
machine_type=_TEST_TPU_MACHINE_TYPE,
2104+
tpu_topology=_TEST_TPU_TOPOLOGY,
2105+
)
2106+
expected_dedicated_resources = gca_machine_resources.DedicatedResources(
2107+
machine_spec=expected_machine_spec, min_replica_count=1, max_replica_count=1
2108+
)
2109+
expected_deployed_model = gca_endpoint.DeployedModel(
2110+
dedicated_resources=expected_dedicated_resources,
2111+
model=test_model.resource_name,
2112+
display_name=None,
2113+
)
2114+
deploy_model_mock.assert_called_once_with(
2115+
endpoint=test_endpoint.resource_name,
2116+
deployed_model=expected_deployed_model,
2117+
traffic_split={"0": 100},
2118+
metadata=(),
2119+
timeout=None,
2120+
)
2121+
20802122
@pytest.mark.usefixtures(
20812123
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
20822124
)

0 commit comments

Comments
 (0)