1. ๊ฐํํ์ต(Reinforcement Learning)์ ์ ์๋ฅผ ์์ ํ๊ณ , ์ง๋ํ์ต(Supervised Learning)๊ณผ์ ์ฐจ์ด์ ์ ์์์ ํจ๊ป ์ค๋ช ํ์์ค
A goal-directed learning from interaction
$$ ๋ฅ๋ฌ๋ \subset ๋จธ์ ๋ฌ๋ \subset ์ธ๊ณต์ง๋ฅ$$3
์ธ๊ณต์ง๋ฅ์ ์ธ๊ฐ์ฒ๋ผ ์ฌ๊ณ ํ๊ณ ํ๋ํ๋ ๊ธฐ๊ณ๋ฅผ ๋ง๋๋ ๊ธฐ์ ์ ๋ฐ์ ์๋ฏธํ๋ค.
import random | |
import numpy as np | |
from visualize_train import draw_value_image, draw_policy_image | |
# left, right, up, down | |
ACTIONS = [np.array([0, -1]), | |
np.array([0, 1]), | |
np.array([-1, 0]), |
import numpy as np | |
from numpy.linalg import inv | |
from visualize_train import draw_value_image, draw_policy_image | |
# left, right, up, down | |
ACTIONS = [np.array([0, -1]), | |
np.array([0, 1]), | |
np.array([-1, 0]), | |
np.array([1, 0])] |
"""Showcase of flying arrows that can stick to objects in a somewhat | |
realistic looking way. | |
""" | |
import sys | |
from typing import List | |
import pygame | |
import pymunk |
import torch | |
from torch import initial_seed | |
directs = [(1, 0), (-1, 0), (0, 1), (0, -1)] # [down, up, right, left] | |
inf = int(1e9) | |
def initialize_policy(width, height, terminals): | |
policy = torch.full((height, width, 4), 0.0) | |
for y in range(height): | |
for x in range(width): |
๋จ์ด | ์์ด | ์ค๋ช | ํ๊ธฐ |
---|---|---|---|
ํ๋ฅ ๋ณ์ | Random Variable, Stochastic Variable | ์ธก์ ๊ฐ์ด ๋ณํ ์ ์๋ ํ๋ฅ ์ด ์ฃผ์ด์ง ๋ณ์ | X |
ํ๋ฅ ๋ถํฌ | Probability Distribution | ํ๋ฅ ๋ณ์๊ฐ ํน์ ํ ๊ฐ์ ๊ฐ์ง ํ๋ฅ ์ ๋๋ค๋ด๋ ํจ์ | |
๊ธฐ๋๊ฐ | Expected Value | ์ด๋ค ํ๋ฅ ์ ๊ฐ์ง ์ฌ๊ฑด์ ๋ฌดํํ ๋ฐ๋ณตํ์ ๊ฒฝ์ฐ ์ป์ ์ ์๋ ๊ฐ์ ํ๊ท ์ผ๋ก์ ๊ธฐ๋ํ ์ ์๋ ๊ฐ. ์ด์ฐ ํ๋ฅ ๋ถํฌ์์๋ ํ๋ฅ ์ง๋ ํจ์(PMF, Probability Mass Function), ์ฐ์ ํ๋ฅ ๋ถํฌ์์๋ ํ๋ฅ ๋ฐ๋ ํจ์(PDF, Probability Density Function) ์ด๋ค. | |
ํ๊ท ๊ฐ | Mean | ํ๋ฅ /ํต๊ณ์์ ๊ธฐ๋๊ฐ์ (๋ชจ)ํ๊ท (Population Mean)์ด๋ผ๊ณ ๋ ๋ถ๋ฅธ๋ค. ๊ธฐ๋๊ฐ |
|