Skip to content

Commit 91b7f6a

Browse files
authored
Feat: Onboard Fashion MNIST dataset (#387)
1 parent 98e0179 commit 91b7f6a

File tree

6 files changed

+210
-0
lines changed

6 files changed

+210
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/**
2+
* Copyright 2021 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
18+
resource "google_storage_bucket" "fashion-mnist" {
19+
name = "${var.bucket_name_prefix}-fashion-mnist"
20+
force_destroy = true
21+
location = "US"
22+
uniform_bucket_level_access = true
23+
lifecycle {
24+
ignore_changes = [
25+
logging,
26+
]
27+
}
28+
}
29+
30+
output "storage_bucket-fashion-mnist-name" {
31+
value = google_storage_bucket.fashion-mnist.name
32+
}
+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/**
2+
* Copyright 2021 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
18+
provider "google" {
19+
project = var.project_id
20+
impersonate_service_account = var.impersonating_acct
21+
region = var.region
22+
}
23+
24+
data "google_client_openid_userinfo" "me" {}
25+
26+
output "impersonating-account" {
27+
value = data.google_client_openid_userinfo.me.email
28+
}
+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/**
2+
* Copyright 2021 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
18+
variable "project_id" {}
19+
variable "bucket_name_prefix" {}
20+
variable "impersonating_acct" {}
21+
variable "region" {}
22+
variable "env" {}
23+
variable "iam_policies" {
24+
default = {}
25+
}
26+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright 2022 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
dataset:
16+
name: fashion_mnist
17+
friendly_name: fashion_mnist
18+
description: ~
19+
dataset_sources: ~
20+
terms_of_use: ~
21+
22+
resources:
23+
- type: storage_bucket
24+
name: fashion-mnist
25+
uniform_bucket_level_access: True
26+
location: US
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
from airflow import DAG
17+
from airflow.operators import bash
18+
19+
default_args = {
20+
"owner": "Google",
21+
"depends_on_past": False,
22+
"start_date": "2022-06-10",
23+
}
24+
25+
26+
with DAG(
27+
dag_id="fashion_mnist.fashion_mnist",
28+
default_args=default_args,
29+
max_active_runs=1,
30+
schedule_interval="@weekly",
31+
catchup=False,
32+
default_view="graph",
33+
) as dag:
34+
35+
# Task to copy `fashion-mnist.gz` from FASHION MNIST Database to GCS
36+
download_zip_files = bash.BashOperator(
37+
task_id="download_zip_files",
38+
bash_command="mkdir -p $data_dir/fashion-mnist\ncurl -o $data_dir/fashion-mnist/t10k-images-idx3-ubyte.gz -L $fashion_mnist_test\ncurl -o $data_dir/fashion-mnist/train-images-idx3-ubyte.gz -L $fashion_mnist_train\ncurl -o $data_dir/fashion-mnist/train-labels-idx1-ubyte.gz -L $fashion_mnist_train_labels\ncurl -o $data_dir/fashion-mnist/t10k-labels-idx1-ubyte.gz -L $fashion_mnist_test_labels\n",
39+
env={
40+
"data_dir": "/home/airflow/gcs/data/fashion-mnist",
41+
"fashion_mnist_test": "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz",
42+
"fashion_mnist_train": "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz",
43+
"fashion_mnist_train_labels": "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz",
44+
"fashion_mnist_test_labels": "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz",
45+
},
46+
)
47+
48+
download_zip_files
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Copyright 2022 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
---
16+
resources: ~
17+
dag:
18+
airflow_version: 2
19+
initialize:
20+
dag_id: fashion_mnist
21+
default_args:
22+
owner: "Google"
23+
depends_on_past: False
24+
start_date: '2022-06-10'
25+
max_active_runs: 1
26+
schedule_interval: "@weekly"
27+
catchup: False
28+
default_view: graph
29+
30+
tasks:
31+
- operator: BashOperator
32+
description: "Task to copy `fashion-mnist.gz` from FASHION MNIST Database to GCS"
33+
args:
34+
task_id: "download_fashion_mnist_zip_files"
35+
bash_command: |
36+
mkdir -p $data_dir/fashion-mnist
37+
curl -o $data_dir/fashion-mnist/t10k-images-idx3-ubyte.gz -L $fashion_mnist_test
38+
curl -o $data_dir/fashion-mnist/train-images-idx3-ubyte.gz -L $fashion_mnist_train
39+
curl -o $data_dir/fashion-mnist/train-labels-idx1-ubyte.gz -L $fashion_mnist_train_labels
40+
curl -o $data_dir/fashion-mnist/t10k-labels-idx1-ubyte.gz -L $fashion_mnist_test_labels
41+
42+
env:
43+
data_dir: /home/airflow/gcs/data/fashion-mnist
44+
fashion_mnist_test: http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
45+
fashion_mnist_train: http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
46+
fashion_mnist_train_labels: http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
47+
fashion_mnist_test_labels: http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
48+
49+
graph_paths:
50+
- "download_fashion_mnist_zip_files"

0 commit comments

Comments
 (0)