-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathself_load_data.py
95 lines (75 loc) · 3.15 KB
/
self_load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
"""
date: 2021/3/16 11:11 上午
written by: neonleexiang
"""
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""CIFAR10 small images classification dataset.
"""
from tarfile import open
import os
import numpy as np
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.datasets.cifar import load_batch
from tensorflow.python.keras.utils.data_utils import get_file
# from tensorflow.python.util.tf_export import keras_export
# @keras_export('keras.datasets.cifar10.load_data')
def load_data(filename):
"""Loads [CIFAR10 dataset](https://www.cs.toronto.edu/~kriz/cifar.html).
This is a dataset of 50,000 32x32 color training images and 10,000 test
images, labeled over 10 categories. See more info at the
[CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html).
Returns:
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
**x_train, x_test**: uint8 arrays of RGB image data with shape
`(num_samples, 3, 32, 32)` if `tf.keras.backend.image_data_format()` is
`'channels_first'`, or `(num_samples, 32, 32, 3)` if the data format
is `'channels_last'`.
**y_train, y_test**: uint8 arrays of category labels
(integers in range 0-9) each with shape (num_samples, 1).
"""
dirname = 'cifar-10-batches-py'
origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
if filename != None:
t = open(filename)
t.extractall(path='./')
path = dirname
else:
path = get_file(
dirname,
origin=origin,
untar=True,
file_hash=
'6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce')
num_train_samples = 50000
x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
y_train = np.empty((num_train_samples,), dtype='uint8')
for i in range(1, 6):
fpath = os.path.join(path, 'data_batch_' + str(i))
(x_train[(i - 1) * 10000:i * 10000, :, :, :],
y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)
fpath = os.path.join(path, 'test_batch')
x_test, y_test = load_batch(fpath)
y_train = np.reshape(y_train, (len(y_train), 1))
y_test = np.reshape(y_test, (len(y_test), 1))
if K.image_data_format() == 'channels_last':
x_train = x_train.transpose(0, 2, 3, 1)
x_test = x_test.transpose(0, 2, 3, 1)
x_test = x_test.astype(x_train.dtype)
y_test = y_test.astype(y_train.dtype)
return (x_train, y_train), (x_test, y_test)