Created at : 2024-06-23 13:33
Auther: Soo.Y
์ฝ๋ฉ(Colab) ํ๊ฒฝ์์ MLX๋ฅผ ์ฌ์ฉํ์ฌ MLP(Multi-Layer Perceptron)์ ๊ตฌํํ๋ ๋ด์ฉ์ ๋๋ค. ์์ ๋ก ์ฌ์ฉํ๋ ๋ฐ์ดํฐ๋ mnist์ ๋๋ค.
MLX install
์ฝ๋ฉ์๋ MLX๋ฅผ ์ค์นํ๋ ๋ฐฉ๋ฒ์ pip install mlx
์ ํ๋ฉด ๋ฉ๋๋ค.
Mnist data set
์์ ์ฌ์ฉํ minist๋ฅผ ๊ฐ์ ธ์ค๊ธฐ ์ํ ์ฝ๋๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค.
import gzip
import os
import pickle
from urllib import request
import numpy as np
def mnist(
save_dir="/tmp",
base_url="https://raw.githubusercontent.com/fgnt/mnist/master/",
filename="mnist.pkl",
):
def download_and_save(save_file):
filename = [
["training_images", "train-images-idx3-ubyte.gz"],
["test_images", "t10k-images-idx3-ubyte.gz"],
["training_labels", "train-labels-idx1-ubyte.gz"],
["test_labels", "t10k-labels-idx1-ubyte.gz"],
]
mnist = {}
for name in filename:
out_file = os.path.join("/tmp", name[1])
request.urlretrieve(base_url + name[1], out_file)
for name in filename[:2]:
out_file = os.path.join("/tmp", name[1])
with gzip.open(out_file, "rb") as f:
mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(
-1, 28 * 28
)
for name in filename[-2:]:
out_file = os.path.join("/tmp", name[1])
with gzip.open(out_file, "rb") as f:
mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
with open(save_file, "wb") as f:
pickle.dump(mnist, f)
def preproc(x):
return x.astype(np.float32) / 255.0
save_file = os.path.join(save_dir, filename)
if not os.path.exists(save_file):
download_and_save(save_file)
with open(save_file, "rb") as f:
mnist = pickle.load(f)
mnist["training_images"] = preproc(mnist["training_images"])
mnist["test_images"] = preproc(mnist["test_images"])
return (
mnist["training_images"],
mnist["training_labels"].astype(np.uint32),
mnist["test_images"],
mnist["test_labels"].astype(np.uint32),
)
train_x, train_y, test_x, test_y = mnist()
ํ๋ จ ๋ฐ์ดํฐ๋ 60,000๊ฐ, ํ ์คํธ ๋ฐ์ดํฐ๋ 10,000๊ฐ๋ก ๊ตฌ์ฑ๋์ด ์์ต๋๋ค.
train_x.shape, train_x
((60000, 784),
array([[0., 0., 0., โฆ, 0., 0., 0.],
[0., 0., 0., โฆ, 0., 0., 0.],
[0., 0., 0., โฆ, 0., 0., 0.],
โฆ,
[0., 0., 0., โฆ, 0., 0., 0.],
[0., 0., 0., โฆ, 0., 0., 0.],
[0., 0., 0., โฆ, 0., 0., 0.]], dtype=float32))
train_y.shape, train_y
((60000,), array([5, 0, 4, โฆ, 5, 6, 8], dtype=uint32))
MLP ๋ง๋ค๊ธฐ
๋ชจ๋ ๋ถ๋ฌ์ค๊ธฐ
import sys
import argparse
import time
from functools import partial
import mlx.core as mx
import mlx.nn as nn
import mlx.optimizers as optim
import numpy as np
MLP class
MLX์์๋ ๋ชจ๋ธ์ ๊ตฌํํ ๋ ๋งค์ง๋ฉ์๋ __init__
๊ณผ __call__
์ ์ฌ์ฉํ์ฌ ๊ตฌํํฉ๋๋ค. pytorch์์๋ forward ํจ์๋ฅผ ์ ์ํ๋ ๊ฒ๊ณผ ์ ์ฌํ ๋ฐฉ๋ฒ์ผ๋ก ๋ณผ ์ ์์ต๋๋ค.(์ค์ ๋ก๋ pytorch์์ forward๋ฅผ ์์ฑํ์ง๋ง __call__
์์ ์ด๋ฏธ forward ํจ์๋ฅผ ํธ์ถํ๋๋ก ๊ตฌํ๋์ด ์์ต๋๋ค.
class MLP(nn.Module):
def __init__(
self,
num_layers: int, # hidden layer์ ์
input_dim: int, hidden_dim: int, output_dim: int
):
super().__init__()
layer_sizes = [input_dim] + [hidden_dim] * num_layers + [output_dim]
self.layers = [
nn.Linear(idim, odim)
for idim, odim in zip(layer_sizes[:-1], layer_sizes[1:])
]
def __call__(self, x):
for l in self.layers[:-1]:
x = nn.relu(l(x))
return self.layers[-1](x)
Batch ํจ์
๋ฐฐ์น ๋จ์ ๋ง๋ค ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค๋ ํจ์๋ฅผ ๊ตฌํํ๊ณ ์ ํฉ๋๋ค. ํจ์์๋ ๋ฐฐ์น์ ํฌ๊ธฐ์ธ batch_size
, ์
๋ ฅ ๋ฐ์ดํฐ์ X
์ ์ถ๋ ฅ ๋ฐ์ดํฐ์ ์ ๋ต์ธ y
๋ฅผ ๋ฐ๋๋ก ์์ฑํฉ๋๋ค.
๋ฐ์ดํฐ์ ์
ํ์ ๊ตฌํํ๊ธฐ ์ํด์ np.random.permutation
์ ์ฌ์ฉํ์ฌ ๋ฌด์์๋ก ๋ฐ์ดํฐ๊ฐ ์์ด๋๋ก ํ์์ต๋๋ค.
๋ง์ง๋ง์ผ๋ก mx.array
์ ์ฌ์ฉํ์ฌ MLX์ array ๊ตฌ์กฐ๋ก ๋ณํํฉ๋๋ค.
def batch_iterate(batch_size, X, y, suffle=True):
if suffle:
perm = mx.array(np.random.permutation(y.size))
else:
perm = mx.array(np.range(y.size))
for s in range(0, y.size, batch_size):
ids = perm[s : s + batch_size]
yield mx.array(X[ids]), mx.array(y[ids])
์์ฑ๋ ์ฝ๋๊ฐ ์ ์๋ํ๋์ง ์๋ ์ฝ๋์ ๊ฐ์ด ์คํํด๋ณด๋ฉด X, y
์ ์ฒซ ๋ฒ์งธ์ ๋ ๋ฒ์งธ ๋ฐ์ดํฐ๊ฐ ๋ค์ด๊ฐ ์์ต๋๋ค.
X, y = next(batch_iterate(2, train_x, train_y, False))
๋ํ, type(X)
๋ฅผ ์คํํด๋ณด๋ฉด mlx.core.array๋ก ๋์ต๋๋ค.
์์คํจ์
๋ฌธ์ ์ ์ ํฉํ ํจ์๋ฅผ ์ ํํ์ฌ ๋ถ๋ฌ์ค๋ฉด ๋ฉ๋๋ค. MLX์์๋ loss ํจ์๋ ๋ฐ๋ก ๊ตฌํ๋ ํจ์๋ค์ ์ฌ์ฉํ๋ฉด ๋ฉ๋๋ค.
def loss_fn(model, X, y):
return nn.losses.cross_entropy(model(X), y, reduction="mean")
MLX์์๋ nn.value_and_grad
๋ฅผ ์ฌ์ฉํด์ wrapperํจ์๋ก ๋ง๋ค์ด์ฃผ๊ณ , ์ด wrapperํจ์๋ฅผ ์คํํ๋ฉด loss ๊ฐ๊ณผ gradient ๊ฐ์ด ๊ณ์ฐ๋ฉ๋๋ค.
์ฃผ์! nn์ pytorch์์ ์ฌ์ฉํ๋ nn์ด ์๋๋๋ค.
loss_and_grad_fn = nn.value_and_grad(model, loss_fn)
loss, grad = loss_and_grad_fn(model, X, y)
Optimizer ๋ถ๋ฌ์ค๊ธฐ
์ฌ์ฉํ๊ณ ์ ํ๋ optimizer๋ฅผ ์ ํํ์ฌ ๋ถ๋ฌ์ต๋๋ค. Optimizer๋ MLX์์ ๊ตฌํ๋ ํจ์๋ค์ ์ฌ์ฉํ๋ฉด ๋ฉ๋๋ค.
optimizer๋ง๋ค ํ๋ผ๋ฏธํฐ์ ์ข ๋ฅ๋ ๋ค๋ฅด์ง๋ง ๊ณตํต ํ๋ผ๋ฏธํฐ์ธ learning rate๋ง ์ ๋ ฅํด์ฃผ๋ฉด ๋๋จธ์ง๋ ๊ธฐ๋ณธ ๊ฐ์ผ๋ก ์ค์ ๋ฉ๋๋ค.
test_lr = 1
optimizer = optim.SGD(learning_rate=test_lr)
๊ทธ ๋ค์ ์์์ ๊ณ์ฐํ gradient ๊ฐ๊ณผ ํจ๊ป ๋ชจ๋ธ ๊ฐ์ฒด๋ฅผ ๋๊ฒจ์ฃผ๋ฉด weight๊ฐ ์ ๋ฐ์ดํธ ๋ฉ๋๋ค.
optimizer.update(model, grad)
gradient update ์ง์ ๊ณ์ฐ
์๋ ์ฝ๋๋ฅผ ํตํด์ gradient์ updateํ๋ ๊ณ์ฐํ ์ ์์ต๋๋ค. ์ด๋ฅผ MLX์์ updateํ ๊ณ์ฐ ๊ฒฐ๊ณผ์ ๋น๊ตํ์ฌ ๊ฒ์ฆํด ๋ด ์๋ค.
weight_myself = init_parameters['layers'][1]['weight'] - grad['layers'][1]['weight'] * 1
weight_mlx = model.parameters()['layers'][1]['weight']
print(weight_myself)
print(weight_mlx)
Validation
ํ๋ จ์ด ์๋ฃ๋์์ผ๋ฉด, test data set์ผ๋ก ๋ชจ๋ธ์ ๊ฒ์ฆํ๋ ๊ณผ์ ์ด ํ์ํฉ๋๋ค. ์ด๋ฒ ์์ ์์๋ ์ซ์ 0๋ถํฐ 9๊น์ง์ ๋ ์ด๋ธ์ ๋ง์ถ๋ ๋ฌธ์ ์ด๋ฏ๋ก argmax
๋ฅผ ์ฌ์ฉํด์ ๊ฐ์ฅ ๋์ ํ๋ฅ ์ด ๊ณ์ฐ๋ ๋ ์ด๋ธ์ ์ฐพ์์ค์ ์ ๋ต๊ณผ ๋น๊ตํ๋ฉด ๋ฉ๋๋ค.
mlx_test_x = mx.array(test_x)
mlx_test_y = mx.array(test_y)
mx.mean(mx.argmax(model(mlx_test_x), axis=1) == mlx_test_y)
MLP ์ ์ฒด ์ฝ๋
๋งํฌ : ์ฝ๋๋ฅผ ์ด๋์์ ๊ณต์ ํด์ผ ํ ๊น์?