Skip to content

Commit 3d7a0d6

Browse files
authored
test: add code snippets for using bigframes.ml (#159)
* test: add code snippets for using bigframes.ml
1 parent bf1ec89 commit 3d7a0d6

File tree

3 files changed

+131
-0
lines changed

3 files changed

+131
-0
lines changed
+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def test_clustering_model():
17+
# [START bigquery_dataframes_clustering_model]
18+
from bigframes.ml.cluster import KMeans
19+
import bigframes.pandas as bpd
20+
21+
# Load data from BigQuery
22+
query_or_table = "bigquery-public-data.ml_datasets.penguins"
23+
bq_df = bpd.read_gbq(query_or_table)
24+
25+
# Create the KMeans model
26+
cluster_model = KMeans(n_clusters=10)
27+
cluster_model.fit(bq_df["culmen_length_mm"], bq_df["sex"])
28+
29+
# Predict using the model
30+
result = cluster_model.predict(bq_df)
31+
# Score the model
32+
score = cluster_model.score(bq_df)
33+
# [END bigquery_dataframes_clustering_model]
34+
assert result is not None
35+
assert score is not None

samples/snippets/gen_ai_model_test.py

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def test_llm_model():
17+
PROJECT_ID = "bigframes-dev"
18+
REGION = "us"
19+
CONN_NAME = "bigframes-ml"
20+
# [START bigquery_dataframes_gen_ai_model]
21+
from bigframes.ml.llm import PaLM2TextGenerator
22+
import bigframes.pandas as bpd
23+
24+
# Create the LLM model
25+
session = bpd.get_global_session()
26+
connection = f"{PROJECT_ID}.{REGION}.{CONN_NAME}"
27+
model = PaLM2TextGenerator(session=session, connection_name=connection)
28+
29+
df_api = bpd.read_csv("gs://cloud-samples-data/vertex-ai/bigframe/df.csv")
30+
31+
# Prepare the prompts and send them to the LLM model for prediction
32+
df_prompt_prefix = "Generate Pandas sample code for DataFrame."
33+
df_prompt = df_prompt_prefix + df_api["API"]
34+
35+
# Predict using the model
36+
df_pred = model.predict(df_prompt.to_frame(), max_output_tokens=1024)
37+
# [END bigquery_dataframes_gen_ai_model]
38+
assert df_pred["ml_generate_text_llm_result"] is not None
39+
assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None
+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def test_regression_model():
17+
# [START bigquery_dataframes_regression_model]
18+
from bigframes.ml.linear_model import LinearRegression
19+
import bigframes.pandas as bpd
20+
21+
# Load data from BigQuery
22+
query_or_table = "bigquery-public-data.ml_datasets.penguins"
23+
bq_df = bpd.read_gbq(query_or_table)
24+
25+
# Filter down to the data to the Adelie Penguin species
26+
adelie_data = bq_df[bq_df.species == "Adelie Penguin (Pygoscelis adeliae)"]
27+
28+
# Drop the species column
29+
adelie_data = adelie_data.drop(columns=["species"])
30+
31+
# Drop rows with nulls to get training data
32+
training_data = adelie_data.dropna()
33+
34+
# Specify your feature (or input) columns and the label (or output) column:
35+
feature_columns = training_data[
36+
["island", "culmen_length_mm", "culmen_depth_mm", "flipper_length_mm", "sex"]
37+
]
38+
label_columns = training_data[["body_mass_g"]]
39+
40+
test_data = adelie_data[adelie_data.body_mass_g.isnull()]
41+
42+
# Create the linear model
43+
model = LinearRegression()
44+
model.fit(feature_columns, label_columns)
45+
46+
# Score the model
47+
score = model.score(feature_columns, label_columns)
48+
49+
# Predict using the model
50+
result = model.predict(test_data)
51+
# [END bigquery_dataframes_regression_model]
52+
assert test_data is not None
53+
assert feature_columns is not None
54+
assert label_columns is not None
55+
assert model is not None
56+
assert score is not None
57+
assert result is not None

0 commit comments

Comments
 (0)