Skip to content

Commit 4026997

Browse files
authored
docs: pandas DataFrame samples are more standalone (#224)
* docs: pandas DataFrame samples are more standalone * fix region tag * fix region tag * remove unused imports * blacken * remove session from call to rows/to_dataframe
1 parent 7b086ba commit 4026997

10 files changed

+248
-13
lines changed

samples/conftest.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import pytest
18+
19+
20+
@pytest.fixture(scope="session")
21+
def project_id():
22+
return os.environ["GOOGLE_CLOUD_PROJECT"]

samples/quickstart/quickstart_test.py

-8
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
# limitations under the License.
1414

1515
import datetime
16-
import os
17-
18-
import pytest
1916

2017
from . import quickstart
2118

@@ -27,11 +24,6 @@ def now_millis():
2724
)
2825

2926

30-
@pytest.fixture()
31-
def project_id():
32-
return os.environ["GOOGLE_CLOUD_PROJECT"]
33-
34-
3527
def test_quickstart_wo_snapshot(capsys, project_id):
3628
quickstart.main(project_id)
3729
out, _ = capsys.readouterr()

samples/to_dataframe/noxfile.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def py(session: nox.sessions.Session) -> None:
226226

227227

228228
def _get_repo_root() -> Optional[str]:
229-
""" Returns the root folder of the project. """
229+
"""Returns the root folder of the project."""
230230
# Get root of this repository. Assume we don't have directories nested deeper than 10 items.
231231
p = Path(os.getcwd())
232232
for i in range(10):
+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def read_query_results():
17+
# [START bigquerystorage_pandas_tutorial_read_query_results]
18+
from google.cloud import bigquery
19+
20+
bqclient = bigquery.Client()
21+
22+
# Download query results.
23+
query_string = """
24+
SELECT
25+
CONCAT(
26+
'https://stackoverflow.com/questions/',
27+
CAST(id as STRING)) as url,
28+
view_count
29+
FROM `bigquery-public-data.stackoverflow.posts_questions`
30+
WHERE tags like '%google-bigquery%'
31+
ORDER BY view_count DESC
32+
"""
33+
34+
dataframe = (
35+
bqclient.query(query_string)
36+
.result()
37+
.to_dataframe(
38+
# Optionally, explicitly request to use the BigQuery Storage API. As of
39+
# google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage
40+
# API is used by default.
41+
create_bqstorage_client=True,
42+
)
43+
)
44+
print(dataframe.head())
45+
# [END bigquerystorage_pandas_tutorial_read_query_results]
46+
47+
return dataframe
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from . import read_query_results
16+
17+
18+
def test_read_query_results(capsys):
19+
read_query_results.read_query_results()
20+
out, _ = capsys.readouterr()
21+
assert "stackoverflow" in out
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def read_table():
17+
# [START bigquerystorage_pandas_tutorial_read_table]
18+
from google.cloud import bigquery
19+
20+
bqclient = bigquery.Client()
21+
22+
# Download a table.
23+
table = bigquery.TableReference.from_string(
24+
"bigquery-public-data.utility_us.country_code_iso"
25+
)
26+
rows = bqclient.list_rows(
27+
table,
28+
selected_fields=[
29+
bigquery.SchemaField("country_name", "STRING"),
30+
bigquery.SchemaField("fips_code", "STRING"),
31+
],
32+
)
33+
dataframe = rows.to_dataframe(
34+
# Optionally, explicitly request to use the BigQuery Storage API. As of
35+
# google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage
36+
# API is used by default.
37+
create_bqstorage_client=True,
38+
)
39+
print(dataframe.head())
40+
# [END bigquerystorage_pandas_tutorial_read_table]
41+
42+
return dataframe
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from . import read_table_bigquery
16+
17+
18+
def test_read_table(capsys):
19+
read_table_bigquery.read_table()
20+
out, _ = capsys.readouterr()
21+
assert "country_name" in out
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def read_table(your_project_id):
17+
original_your_project_id = your_project_id
18+
# [START bigquerystorage_pandas_tutorial_read_session]
19+
your_project_id = "project-for-read-session"
20+
# [END bigquerystorage_pandas_tutorial_read_session]
21+
your_project_id = original_your_project_id
22+
23+
# [START bigquerystorage_pandas_tutorial_read_session]
24+
from google.cloud import bigquery_storage
25+
from google.cloud.bigquery_storage import types
26+
import pandas
27+
28+
bqstorageclient = bigquery_storage.BigQueryReadClient()
29+
30+
project_id = "bigquery-public-data"
31+
dataset_id = "new_york_trees"
32+
table_id = "tree_species"
33+
table = f"projects/{project_id}/datasets/{dataset_id}/tables/{table_id}"
34+
35+
# Select columns to read with read options. If no read options are
36+
# specified, the whole table is read.
37+
read_options = types.ReadSession.TableReadOptions(
38+
selected_fields=["species_common_name", "fall_color"]
39+
)
40+
41+
parent = "projects/{}".format(your_project_id)
42+
43+
requested_session = types.ReadSession(
44+
table=table,
45+
# Avro is also supported, but the Arrow data format is optimized to
46+
# work well with column-oriented data structures such as pandas
47+
# DataFrames.
48+
data_format=types.DataFormat.ARROW,
49+
read_options=read_options,
50+
)
51+
read_session = bqstorageclient.create_read_session(
52+
parent=parent, read_session=requested_session, max_stream_count=1,
53+
)
54+
55+
# This example reads from only a single stream. Read from multiple streams
56+
# to fetch data faster. Note that the session may not contain any streams
57+
# if there are no rows to read.
58+
stream = read_session.streams[0]
59+
reader = bqstorageclient.read_rows(stream.name)
60+
61+
# Parse all Arrow blocks and create a dataframe.
62+
frames = []
63+
for message in reader.rows().pages:
64+
frames.append(message.to_dataframe())
65+
dataframe = pandas.concat(frames)
66+
print(dataframe.head())
67+
# [END bigquerystorage_pandas_tutorial_read_session]
68+
69+
return dataframe
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from . import read_table_bqstorage
16+
17+
18+
def test_read_table(capsys, project_id):
19+
read_table_bqstorage.read_table(your_project_id=project_id)
20+
out, _ = capsys.readouterr()
21+
assert "species_common_name" in out

samples/to_dataframe/requirements.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ google-auth==1.32.1
22
google-cloud-bigquery-storage==2.6.0
33
google-cloud-bigquery==2.20.0
44
pyarrow==4.0.1
5-
ipython==7.10.2; python_version > '3.0'
6-
ipython==5.9.0; python_version < '3.0'
7-
pandas==0.25.3; python_version > '3.0'
8-
pandas==0.24.2; python_version < '3.0'
5+
ipython==7.24.0; python_version > '3.6'
6+
ipython==7.16.1; python_version <= '3.6'
7+
pandas==1.2.5; python_version > '3.6'
8+
pandas==1.1.5; python_version <= '3.6'

0 commit comments

Comments
 (0)