jxy · February 24, 2021 05:10
diff --git a/hmc_2DU1.py b/hmc_2DU1.py
 # Copyright (c) 2021 Xiao-Yong Jin
 # 
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 # 
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 # 
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

 import torch
 import math
 import sys
 import os
 from timeit import default_timer as timer
 from functools import reduce

 class Param:
    def __init__(self, beta = 6.0, lat = [64, 64], tau = 2.0, nstep = 50, ntraj = 256, nrun = 4, nprint = 256, seed = 11*13, randinit = False, nth = int(os.environ.get('OMP_NUM_THREADS', '2')), nth_interop = 2):
        self.beta = beta
        self.lat = lat
        self.nd = len(lat)
        self.volume = reduce(lambda x,y:x*y, lat)
        self.tau = tau
        self.nstep = nstep
        self.dt = self.tau / self.nstep
        self.ntraj = ntraj
        self.nrun = nrun
        self.nprint = nprint
        self.seed = seed
        self.randinit = randinit
        self.nth = nth
        self.nth_interop = nth_interop
    def initializer(self):
        if self.randinit:
            return torch.empty((param.nd,) + param.lat).uniform_(-math.pi, math.pi)
        else:
            return torch.zeros((param.nd,) + param.lat)
    def summary(self):
        return f"""latsize = {self.lat}
 volume = {self.volume}
 beta = {self.beta}
 trajs = {self.ntraj}
 tau = {self.tau}
 steps = {self.nstep}
 seed = {self.seed}
 nth = {self.nth}
 nth_interop = {self.nth_interop}
 """
    def uniquestr(self):
        lat = ".".join(str(x) for x in self.lat)
        return f"out_l{lat}_b{param.beta}_n{param.ntraj}_t{param.tau}_s{param.nstep}.out"

 def action(param, f):
    return (-param.beta)*torch.sum(torch.cos(plaqphase(f)))
 def force(param, f):
    f.requires_grad_(True)
    s = action(param, f)
    s.backward()
    ff = f.grad
    f.requires_grad_(False)
    return ff

 plaqphase = lambda f: f[0,:] - f[1,:] - torch.roll(f[0,:], shifts=-1, dims=1) + torch.roll(f[1,:], shifts=-1, dims=0)
 topocharge = lambda f: torch.floor(0.1 + torch.sum(regularize(plaqphase(f))) / (2*math.pi))
 def regularize(f):
    p2 = 2*math.pi
    f_ = (f - math.pi) / p2
    return p2*(f_ - torch.floor(f_) - 0.5)

 def leapfrog(param, x, p):
    dt = param.dt
    x_ = x + 0.5*dt*p
    p_ = p + (-dt)*force(param, x_)
    for i in range(param.nstep-1):
        x_ = x_ + dt*p_
        p_ = p_ + (-dt)*force(param, x_)
    x_ = x_ + 0.5*dt*p_
    return (x_, p_)
 def hmc(param, x):
    p = torch.randn_like(x)
    act0 = action(param, x) + 0.5*torch.sum(p*p)
    x_, p_ = leapfrog(param, x, p)
    xr = regularize(x_)
    act = action(param, xr) + 0.5*torch.sum(p_*p_)
    prob = torch.rand([], dtype=torch.float64)
    dH = act-act0
    exp_mdH = torch.exp(-dH)
    acc = prob < exp_mdH
    newx = xr if acc else x
    return (dH, exp_mdH, acc, newx)

 put = lambda s: sys.stdout.write(s)

 # ------ BEGIN CODE FROM https://arxiv.org/abs/2101.08176
 # Introduction to Normalizing Flows for Lattice Field Theory
 # Michael S. Albergo, Denis Boyda, Daniel C. Hackett, Gurtej Kanwar, Kyle Cranmer, Sébastien Racanière, Danilo Jimenez Rezende, Phiala E. Shanahan
 # License: CC BY 4.0
 # With slight modifications by Xiao-Yong Jin to reduce global variables

 import numpy as np
 import packaging.version
 torch_device = 'cpu'
 float_dtype = np.float64
 def torch_mod(x):
    return torch.remainder(x, 2*np.pi)
 def torch_wrap(x):
    return torch_mod(x+np.pi) - np.pi
 def grab(var):
    return var.detach().cpu().numpy()
 def compute_ess(logp, logq):
    logw = logp - logq
    log_ess = 2*torch.logsumexp(logw, dim=0) - torch.logsumexp(2*logw, dim=0)
    ess_per_cfg = torch.exp(log_ess) / len(logw)
    return ess_per_cfg
 def bootstrap(x, *, Nboot, binsize):
    boots = []
    x = x.reshape(-1, binsize, *x.shape[1:])
    for i in range(Nboot):
        boots.append(np.mean(x[np.random.randint(len(x), size=len(x))], axis=(0,1)))
    return np.mean(boots), np.std(boots)
 def print_metrics(history, avg_last_N_epochs, era, epoch):
    print(f'== Era {era} | Epoch {epoch} metrics ==')
    for key, val in history.items():
        avgd = np.mean(val[-avg_last_N_epochs:])
        print(f'\t{key} {avgd:g}')
 def serial_sample_generator(model, action, batch_size, N_samples):
    layers, prior = model['layers'], model['prior']
    layers.eval()
    x, logq, logp = None, None, None
    for i in range(N_samples):
        batch_i = i % batch_size
        if batch_i == 0:
            # we're out of samples to propose, generate a new batch
            x, logq = apply_flow_to_prior(prior, layers, batch_size=batch_size)
            logp = -action(x)
        yield x[batch_i], logq[batch_i], logp[batch_i]
 def make_mcmc_ensemble(model, action, batch_size, N_samples):
    history = {
        'x' : [],
        'logq' : [],
        'logp' : [],
        'accepted' : []
    }

    # build Markov chain
    sample_gen = serial_sample_generator(model, action, batch_size, N_samples)
    for new_x, new_logq, new_logp in sample_gen:
        if len(history['logp']) == 0:
            # always accept first proposal, Markov chain must start somewhere
            accepted = True
        else: 
            # Metropolis acceptance condition
            last_logp = history['logp'][-1]
            last_logq = history['logq'][-1]
            p_accept = torch.exp((new_logp - new_logq) - (last_logp - last_logq))
            p_accept = min(1, p_accept)
            draw = torch.rand(1) # ~ [0,1]
            if draw < p_accept:
                accepted = True
            else:
                accepted = False
                new_x = history['x'][-1]
                new_logp = last_logp
                new_logq = last_logq
        # Update Markov chain
        history['logp'].append(new_logp)
        history['logq'].append(new_logq)
        history['x'].append(new_x)
        history['accepted'].append(accepted)
    return history
 def make_conv_net(*, hidden_sizes, kernel_size, in_channels, out_channels, use_final_tanh):
    sizes = [in_channels] + hidden_sizes + [out_channels]
    assert packaging.version.parse(torch.__version__) >= packaging.version.parse('1.5.0')
    assert kernel_size % 2 == 1, 'kernel size must be odd for PyTorch >= 1.5.0'
    padding_size = (kernel_size // 2)
    net = []
    for i in range(len(sizes) - 1):
        net.append(torch.nn.Conv2d(
            sizes[i], sizes[i+1], kernel_size, padding=padding_size,
            stride=1, padding_mode='circular'))
        if i != len(sizes) - 2:
            net.append(torch.nn.LeakyReLU())
        else:
            if use_final_tanh:
                net.append(torch.nn.Tanh())
    return torch.nn.Sequential(*net)
 def set_weights(m):
    if hasattr(m, 'weight') and m.weight is not None:
        torch.nn.init.normal_(m.weight, mean=1, std=2)
    if hasattr(m, 'bias') and m.bias is not None:
        m.bias.data.fill_(-1)
 def calc_dkl(logp, logq):
    return (logq - logp).mean()  # reverse KL, assuming samples from q
 def apply_flow_to_prior(prior, coupling_layers, *, batch_size):
    x = prior.sample_n(batch_size)
    logq = prior.log_prob(x)
    for layer in coupling_layers:
        x, logJ = layer.forward(x)
        logq = logq - logJ
    return x, logq
 def train_step(model, action, loss_fn, optimizer, metrics, batch_size):
    layers, prior = model['layers'], model['prior']
    optimizer.zero_grad()

    x, logq = apply_flow_to_prior(prior, layers, batch_size=batch_size)
    logp = -action(x)
    loss = calc_dkl(logp, logq)
    loss.backward()

    optimizer.step()

    metrics['loss'].append(grab(loss))
    metrics['logp'].append(grab(logp))
    metrics['logq'].append(grab(logq))
    metrics['ess'].append(grab( compute_ess(logp, logq) ))
 def compute_u1_plaq(links, mu, nu):
    """Compute U(1) plaqs in the (mu,nu) plane given `links` = arg(U)"""
    return (links[:,mu] + torch.roll(links[:,nu], -1, mu+1)
            - torch.roll(links[:,mu], -1, nu+1) - links[:,nu])
 class U1GaugeAction:
    def __init__(self, beta):
        self.beta = beta
    def __call__(self, cfgs):
        Nd = cfgs.shape[1]
        action_density = 0
        for mu in range(Nd):
            for nu in range(mu+1,Nd):
                action_density = action_density + torch.cos(
                    compute_u1_plaq(cfgs, mu, nu))
        return -self.beta * torch.sum(action_density, dim=tuple(range(1,Nd+1)))
 def gauge_transform(links, alpha):
    for mu in range(len(links.shape[2:])):
        links[:,mu] = alpha + links[:,mu] - torch.roll(alpha, -1, mu+1)
    return links
 def random_gauge_transform(x):
    Nconf, VolShape = x.shape[0], x.shape[2:]
    return gauge_transform(x, 2*np.pi*torch.rand((Nconf,) + VolShape))
 def topo_charge(x):
    P01 = torch_wrap(compute_u1_plaq(x, mu=0, nu=1))
    axes = tuple(range(1, len(P01.shape)))
    return torch.sum(P01, dim=axes) / (2*np.pi)
 class MultivariateUniform(torch.nn.Module):
    """Uniformly draw samples from [a,b]"""
    def __init__(self, a, b):
        super().__init__()
        self.dist = torch.distributions.uniform.Uniform(a, b)
    def log_prob(self, x):
        axes = range(1, len(x.shape))
        return torch.sum(self.dist.log_prob(x), dim=tuple(axes))
    def sample_n(self, batch_size):
        return self.dist.sample((batch_size,))
 class GaugeEquivCouplingLayer(torch.nn.Module):
    """U(1) gauge equiv coupling layer defined by `plaq_coupling` acting on plaquettes."""
    def __init__(self, *, lattice_shape, mask_mu, mask_off, plaq_coupling):
        super().__init__()
        link_mask_shape = (len(lattice_shape),) + lattice_shape
        self.active_mask = make_2d_link_active_stripes(link_mask_shape, mask_mu, mask_off)
        self.plaq_coupling = plaq_coupling

    def forward(self, x):
        plaq = compute_u1_plaq(x, mu=0, nu=1)
        new_plaq, logJ = self.plaq_coupling(plaq)
        delta_plaq = new_plaq - plaq
        delta_links = torch.stack((delta_plaq, -delta_plaq), dim=1) # signs for U vs Udagger
        fx = self.active_mask * torch_mod(delta_links + x) + (1-self.active_mask) * x
        return fx, logJ

    def reverse(self, fx):
        new_plaq = compute_u1_plaq(fx, mu=0, nu=1)
        plaq, logJ = self.plaq_coupling.reverse(new_plaq)
        delta_plaq = plaq - new_plaq
        delta_links = torch.stack((delta_plaq, -delta_plaq), dim=1) # signs for U vs Udagger
        x = self.active_mask * torch_mod(delta_links + fx) + (1-self.active_mask) * fx
        return x, logJ
 def make_2d_link_active_stripes(shape, mu, off):
    """
    Stripes mask looks like in the `mu` channel (mu-oriented links)::

      1 0 0 0 1 0 0 0 1 0 0
      1 0 0 0 1 0 0 0 1 0 0
      1 0 0 0 1 0 0 0 1 0 0
      1 0 0 0 1 0 0 0 1 0 0

    where vertical is the `mu` direction, and the pattern is offset in the nu
    direction by `off` (mod 4). The other channel is identically 0.
    """
    assert len(shape) == 2+1, 'need to pass shape suitable for 2D gauge theory'
    assert shape[0] == len(shape[1:]), 'first dim of shape must be Nd'
    assert mu in (0,1), 'mu must be 0 or 1'

    mask = np.zeros(shape).astype(np.uint8)
    if mu == 0:
        mask[mu,:,0::4] = 1
    elif mu == 1:
        mask[mu,0::4] = 1
    nu = 1-mu
    mask = np.roll(mask, off, axis=nu+1)
    return torch.from_numpy(mask.astype(float_dtype)).to(torch_device)
 def make_single_stripes(shape, mu, off):
    """
      1 0 0 0 1 0 0 0 1 0 0
      1 0 0 0 1 0 0 0 1 0 0
      1 0 0 0 1 0 0 0 1 0 0
      1 0 0 0 1 0 0 0 1 0 0

    where vertical is the `mu` direction. Vector of 1 is repeated every 4.
    The pattern is offset in perpendicular to the mu direction by `off` (mod 4).
    """
    assert len(shape) == 2, 'need to pass 2D shape'
    assert mu in (0,1), 'mu must be 0 or 1'

    mask = np.zeros(shape).astype(np.uint8)
    if mu == 0:
        mask[:,0::4] = 1
    elif mu == 1:
        mask[0::4] = 1
    mask = np.roll(mask, off, axis=1-mu)
    return torch.from_numpy(mask).to(torch_device)
 def make_double_stripes(shape, mu, off):
    """
    Double stripes mask looks like::

      1 1 0 0 1 1 0 0
      1 1 0 0 1 1 0 0
      1 1 0 0 1 1 0 0
      1 1 0 0 1 1 0 0

    where vertical is the `mu` direction. The pattern is offset in perpendicular
    to the mu direction by `off` (mod 4).
    """
    assert len(shape) == 2, 'need to pass 2D shape'
    assert mu in (0,1), 'mu must be 0 or 1'

    mask = np.zeros(shape).astype(np.uint8)
    if mu == 0:
        mask[:,0::4] = 1
        mask[:,1::4] = 1
    elif mu == 1:
        mask[0::4] = 1
        mask[1::4] = 1
    mask = np.roll(mask, off, axis=1-mu)
    return torch.from_numpy(mask).to(torch_device)
 def make_plaq_masks(mask_shape, mask_mu, mask_off):
    mask = {}
    mask['frozen'] = make_double_stripes(mask_shape, mask_mu, mask_off+1)
    mask['active'] = make_single_stripes(mask_shape, mask_mu, mask_off)
    mask['passive'] = 1 - mask['frozen'] - mask['active']
    return mask
 def tan_transform(x, s):
    return torch_mod(2*torch.atan(torch.exp(s)*torch.tan(x/2)))

 def tan_transform_logJ(x, s):
    return -torch.log(torch.exp(-s)*torch.cos(x/2)**2 + torch.exp(s)*torch.sin(x/2)**2)
 def mixture_tan_transform(x, s):
    assert len(x.shape) == len(s.shape), \
        f'Dimension mismatch between x and s {x.shape} vs {s.shape}'
    return torch.mean(tan_transform(x, s), dim=1, keepdim=True)

 def mixture_tan_transform_logJ(x, s):
    assert len(x.shape) == len(s.shape), \
        f'Dimension mismatch between x and s {x.shape} vs {s.shape}'
    return torch.logsumexp(tan_transform_logJ(x, s), dim=1) - np.log(s.shape[1])
 def invert_transform_bisect(y, *, f, tol, max_iter, a=0, b=2*np.pi):
    min_x = a*torch.ones_like(y)
    max_x = b*torch.ones_like(y)
    min_val = f(min_x)
    max_val = f(max_x)
    with torch.no_grad():
        for i in range(max_iter):
            mid_x = (min_x + max_x) / 2
            mid_val = f(mid_x)
            greater_mask = (y > mid_val).int()
            greater_mask = greater_mask.float()
            err = torch.max(torch.abs(y - mid_val))
            if err < tol: return mid_x
            if torch.all((mid_x == min_x) + (mid_x == max_x)):
                print('WARNING: Reached floating point precision before tolerance '
                      f'(iter {i}, err {err})')
                return mid_x
            min_x = greater_mask*mid_x + (1-greater_mask)*min_x
            min_val = greater_mask*mid_val + (1-greater_mask)*min_val
            max_x = (1-greater_mask)*mid_x + greater_mask*max_x
            max_val = (1-greater_mask)*mid_val + greater_mask*max_val
        print(f'WARNING: Did not converge to tol {tol} in {max_iter} iters! Error was {err}')
        return mid_x
 def stack_cos_sin(x):
    return torch.stack((torch.cos(x), torch.sin(x)), dim=1)
 class NCPPlaqCouplingLayer(torch.nn.Module):
    def __init__(self, net, *, mask_shape, mask_mu, mask_off,
                 inv_prec=1e-6, inv_max_iter=1000):
        super().__init__()
        assert len(mask_shape) == 2, (
            f'NCPPlaqCouplingLayer is implemented only in 2D, '
            f'mask shape {mask_shape} is invalid')
        self.mask = make_plaq_masks(mask_shape, mask_mu, mask_off)
        self.net = net
        self.inv_prec = inv_prec
        self.inv_max_iter = inv_max_iter

    def forward(self, x):
        x2 = self.mask['frozen'] * x
        net_out = self.net(stack_cos_sin(x2))
        assert net_out.shape[1] >= 2, 'CNN must output n_mix (s_i) + 1 (t) channels'
        s, t = net_out[:,:-1], net_out[:,-1]

        x1 = self.mask['active'] * x
        x1 = x1.unsqueeze(1)
        local_logJ = self.mask['active'] * mixture_tan_transform_logJ(x1, s)
        axes = tuple(range(1, len(local_logJ.shape)))
        logJ = torch.sum(local_logJ, dim=axes)
        fx1 = self.mask['active'] * mixture_tan_transform(x1, s).squeeze(1)

        fx = (
            self.mask['active'] * torch_mod(fx1 + t) +
            self.mask['passive'] * x +
            self.mask['frozen'] * x)
        return fx, logJ

    def reverse(self, fx):
        fx2 = self.mask['frozen'] * fx
        net_out = self.net(stack_cos_sin(fx2))
        assert net_out.shape[1] >= 2, 'CNN must output n_mix (s_i) + 1 (t) channels'
        s, t = net_out[:,:-1], net_out[:,-1]

        x1 = torch_mod(self.mask['active'] * (fx - t).unsqueeze(1))
        transform = lambda x: self.mask['active'] * mixture_tan_transform(x, s)
        x1 = invert_transform_bisect(
            x1, f=transform, tol=self.inv_prec, max_iter=self.inv_max_iter)
        local_logJ = self.mask['active'] * mixture_tan_transform_logJ(x1, s)
        axes = tuple(range(1, len(local_logJ.shape)))
        logJ = -torch.sum(local_logJ, dim=axes)
        x1 = x1.squeeze(1)

        x = (
            self.mask['active'] * x1 +
            self.mask['passive'] * fx +
            self.mask['frozen'] * fx2)
        return x, logJ
 def make_u1_equiv_layers(*, n_layers, n_mixture_comps, lattice_shape, hidden_sizes, kernel_size):
    layers = []
    for i in range(n_layers):
        # periodically loop through all arrangements of maskings
        mu = i % 2
        off = (i//2) % 4
        in_channels = 2 # x - > (cos(x), sin(x))
        out_channels = n_mixture_comps + 1 # for mixture s and t, respectively
        net = make_conv_net(in_channels=in_channels, out_channels=out_channels,
            hidden_sizes=hidden_sizes, kernel_size=kernel_size,
            use_final_tanh=False)
        plaq_coupling = NCPPlaqCouplingLayer(
            net, mask_shape=lattice_shape, mask_mu=mu, mask_off=off)
        link_coupling = GaugeEquivCouplingLayer(
            lattice_shape=lattice_shape, mask_mu=mu, mask_off=off, 
            plaq_coupling=plaq_coupling)
        layers.append(link_coupling)
    return torch.nn.ModuleList(layers)

 def flow_train(param):  # packaged from original ipynb by Xiao-Yong Jin
    # Theory
    lattice_shape = param.lat
    link_shape = (2,*param.lat)
    beta = param.beta
    u1_action = U1GaugeAction(beta)

    # Model
    prior = MultivariateUniform(torch.zeros(link_shape), 2*np.pi*torch.ones(link_shape))

    n_layers = 16
    n_s_nets = 2
    hidden_sizes = [8,8]
    kernel_size = 3
    layers = make_u1_equiv_layers(lattice_shape=lattice_shape, n_layers=n_layers, n_mixture_comps=n_s_nets,
                                  hidden_sizes=hidden_sizes, kernel_size=kernel_size)
    set_weights(layers)
    model = {'layers': layers, 'prior': prior}

    # Training
    base_lr = .001
    optimizer = torch.optim.Adam(model['layers'].parameters(), lr=base_lr)

    N_era = 10
    N_epoch = 100
    batch_size = 64
    print_freq = N_epoch # epochs
    plot_freq = 1 # epochs

    history = {
        'loss' : [],
        'logp' : [],
        'logq' : [],
        'ess' : []
    }

    for era in range(N_era):
        for epoch in range(N_epoch):
            train_step(model, u1_action, calc_dkl, optimizer, history, batch_size)
            if epoch % print_freq == 0:
                print_metrics(history, print_freq, era, epoch)
    return model,u1_action

 def flow_eval(model, u1_action):  # packaged from original ipynb by Xiao-Yong Jin
    ensemble_size = 8192
    u1_ens = make_mcmc_ensemble(model, u1_action, 64, ensemble_size)
    print("Accept rate:", np.mean(u1_ens['accepted']))
    Q = grab(topo_charge(torch.stack(u1_ens['x'], axis=0)))
    X_mean, X_err = bootstrap(Q**2, Nboot=100, binsize=16)
    print(f'Topological susceptibility = {X_mean:.2f} +/- {X_err:.2f}')
    print(f'... vs HMC estimate = 1.23 +/- 0.02')

 # ------ END CODE FROM https://arxiv.org/abs/2101.08176

 if __name__ == '__main__':
    param = Param(
        beta = 2.0,
        lat = (8, 8),
        tau = 2, # 0.3
        nstep = 8, # 3
        ntraj = 256, # 2**16 # 2**10 # 2**15
        nprint = 8,
        seed = 1331)

    torch.manual_seed(param.seed)

    torch.set_num_threads(param.nth)
    torch.set_num_interop_threads(param.nth_interop)
    os.environ["OMP_NUM_THREADS"] = str(param.nth)
    os.environ["KMP_BLOCKTIME"] = "0"
    os.environ["KMP_SETTINGS"] = "1"
    os.environ["KMP_AFFINITY"]= "granularity=fine,verbose,compact,1,0"

    torch.set_default_tensor_type(torch.DoubleTensor)

    def run(param):
        with open(param.uniquestr(), "w") as O:
            params = param.summary()
            O.write(params)
            put(params)
            field = param.initializer() # mu, x, t
            plaq, topo = (action(param, field) / (-param.beta*param.volume), topocharge(field))
            status = f"Initial configuration:  plaq: {plaq}  topo: {topo}\n"
            O.write(status)
            put(status)
            ts = []
            for n in range(param.nrun):
                t = -timer()
                for i in range(param.ntraj):
                    dH, exp_mdH, acc, field = hmc(param, field)
                    plaq = action(param, field) / (-param.beta*param.volume)
                    topo = topocharge(field)
                    ifacc = "ACCEPT" if acc else "REJECT"
                    status = f"Traj: {n*param.ntraj+i+1:4}  {ifacc}:  dH: {dH:< 12.8}  exp(-dH): {exp_mdH:< 12.8}  plaq: {plaq:< 12.8}  topo: {topo:< 3.3}\n"
                    O.write(status)
                    if (i+1) % (param.ntraj//param.nprint) == 0:
                        put(status)
                t += timer()
                ts.append(t)
            print("Run times: ", ts)
            print("Per trajectory: ", [t/param.ntraj for t in ts])
        return field
    field = run(param)
    field = torch.reshape(field,(1,)+field.shape)

    flow_model,flow_act = flow_train(param)
    flow_eval(flow_model,flow_act)

    flow = flow_model['layers']
    flow.eval()
    print(f'plaq(field) {action(param, field[0]) / (-param.beta*param.volume)}')
    field.requires_grad_(True)
    x = field
    logJ = 0.0
    for layer in reversed(flow):
      x, lJ = layer.reverse(x)
      logJ += lJ
    s = action(param, x[0]) - logJ
    s.backward()
    f = field.grad
    field.requires_grad_(False)
    print(f'plaq(x) {action(param, x[0]) / (-param.beta*param.volume)}  logJ {logJ}  force.norm {torch.linalg.norm(f)}')
    y = x
    logJy = 0.0
    for layer in flow:
      y, lJ = layer.forward(y)
      logJy += lJ
    print(f'plaq(y) {action(param, y[0]) / (-param.beta*param.volume)}  logJy {logJy}')
	# Copyright (c) 2021 Xiao-Yong Jin
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in all
	# copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.

	import torch
	import math
	import sys
	import os
	from timeit import default_timer as timer
	from functools import reduce

	class Param:
	def __init__(self, beta = 6.0, lat = [64, 64], tau = 2.0, nstep = 50, ntraj = 256, nrun = 4, nprint = 256, seed = 11*13, randinit = False, nth = int(os.environ.get('OMP_NUM_THREADS', '2')), nth_interop = 2):
	self.beta = beta
	self.lat = lat
	self.nd = len(lat)
	self.volume = reduce(lambda x,y:x*y, lat)
	self.tau = tau
	self.nstep = nstep
	self.dt = self.tau / self.nstep
	self.ntraj = ntraj
	self.nrun = nrun
	self.nprint = nprint
	self.seed = seed
	self.randinit = randinit
	self.nth = nth
	self.nth_interop = nth_interop
	def initializer(self):
	if self.randinit:
	return torch.empty((param.nd,) + param.lat).uniform_(-math.pi, math.pi)
	else:
	return torch.zeros((param.nd,) + param.lat)
	def summary(self):
	return f"""latsize = {self.lat}
	volume = {self.volume}
	beta = {self.beta}
	trajs = {self.ntraj}
	tau = {self.tau}
	steps = {self.nstep}
	seed = {self.seed}
	nth = {self.nth}
	nth_interop = {self.nth_interop}
	"""
	def uniquestr(self):
	lat = ".".join(str(x) for x in self.lat)
	return f"out_l{lat}_b{param.beta}_n{param.ntraj}_t{param.tau}_s{param.nstep}.out"

	def action(param, f):
	return (-param.beta)*torch.sum(torch.cos(plaqphase(f)))
	def force(param, f):
	f.requires_grad_(True)
	s = action(param, f)
	s.backward()
	ff = f.grad
	f.requires_grad_(False)
	return ff

	plaqphase = lambda f: f[0,:] - f[1,:] - torch.roll(f[0,:], shifts=-1, dims=1) + torch.roll(f[1,:], shifts=-1, dims=0)
	topocharge = lambda f: torch.floor(0.1 + torch.sum(regularize(plaqphase(f))) / (2*math.pi))
	def regularize(f):
	p2 = 2*math.pi
	f_ = (f - math.pi) / p2
	return p2*(f_ - torch.floor(f_) - 0.5)

	def leapfrog(param, x, p):
	dt = param.dt
	x_ = x + 0.5dtp
	p_ = p + (-dt)*force(param, x_)
	for i in range(param.nstep-1):
	x_ = x_ + dt*p_
	p_ = p_ + (-dt)*force(param, x_)
	x_ = x_ + 0.5dtp_
	return (x_, p_)
	def hmc(param, x):
	p = torch.randn_like(x)
	act0 = action(param, x) + 0.5torch.sum(pp)
	x_, p_ = leapfrog(param, x, p)
	xr = regularize(x_)
	act = action(param, xr) + 0.5torch.sum(p_p_)
	prob = torch.rand([], dtype=torch.float64)
	dH = act-act0
	exp_mdH = torch.exp(-dH)
	acc = prob < exp_mdH
	newx = xr if acc else x
	return (dH, exp_mdH, acc, newx)

	put = lambda s: sys.stdout.write(s)

	# ------ BEGIN CODE FROM https://arxiv.org/abs/2101.08176
	# Introduction to Normalizing Flows for Lattice Field Theory
	# Michael S. Albergo, Denis Boyda, Daniel C. Hackett, Gurtej Kanwar, Kyle Cranmer, Sébastien Racanière, Danilo Jimenez Rezende, Phiala E. Shanahan
	# License: CC BY 4.0
	# With slight modifications by Xiao-Yong Jin to reduce global variables

	import numpy as np
	import packaging.version
	torch_device = 'cpu'
	float_dtype = np.float64
	def torch_mod(x):
	return torch.remainder(x, 2*np.pi)
	def torch_wrap(x):
	return torch_mod(x+np.pi) - np.pi
	def grab(var):
	return var.detach().cpu().numpy()
	def compute_ess(logp, logq):
	logw = logp - logq
	log_ess = 2torch.logsumexp(logw, dim=0) - torch.logsumexp(2logw, dim=0)
	ess_per_cfg = torch.exp(log_ess) / len(logw)
	return ess_per_cfg
	def bootstrap(x, *, Nboot, binsize):
	boots = []
	x = x.reshape(-1, binsize, *x.shape[1:])
	for i in range(Nboot):
	boots.append(np.mean(x[np.random.randint(len(x), size=len(x))], axis=(0,1)))
	return np.mean(boots), np.std(boots)
	def print_metrics(history, avg_last_N_epochs, era, epoch):
	print(f'== Era {era} \| Epoch {epoch} metrics ==')
	for key, val in history.items():
	avgd = np.mean(val[-avg_last_N_epochs:])
	print(f'\t{key} {avgd:g}')
	def serial_sample_generator(model, action, batch_size, N_samples):
	layers, prior = model['layers'], model['prior']
	layers.eval()
	x, logq, logp = None, None, None
	for i in range(N_samples):
	batch_i = i % batch_size
	if batch_i == 0:
	# we're out of samples to propose, generate a new batch
	x, logq = apply_flow_to_prior(prior, layers, batch_size=batch_size)
	logp = -action(x)
	yield x[batch_i], logq[batch_i], logp[batch_i]
	def make_mcmc_ensemble(model, action, batch_size, N_samples):
	history = {
	'x' : [],
	'logq' : [],
	'logp' : [],
	'accepted' : []
	}

	# build Markov chain
	sample_gen = serial_sample_generator(model, action, batch_size, N_samples)
	for new_x, new_logq, new_logp in sample_gen:
	if len(history['logp']) == 0:
	# always accept first proposal, Markov chain must start somewhere
	accepted = True
	else:
	# Metropolis acceptance condition
	last_logp = history['logp'][-1]
	last_logq = history['logq'][-1]
	p_accept = torch.exp((new_logp - new_logq) - (last_logp - last_logq))
	p_accept = min(1, p_accept)
	draw = torch.rand(1) # ~ [0,1]
	if draw < p_accept:
	accepted = True
	else:
	accepted = False
	new_x = history['x'][-1]
	new_logp = last_logp
	new_logq = last_logq
	# Update Markov chain
	history['logp'].append(new_logp)
	history['logq'].append(new_logq)
	history['x'].append(new_x)
	history['accepted'].append(accepted)
	return history
	def make_conv_net(*, hidden_sizes, kernel_size, in_channels, out_channels, use_final_tanh):
	sizes = [in_channels] + hidden_sizes + [out_channels]
	assert packaging.version.parse(torch.__version__) >= packaging.version.parse('1.5.0')
	assert kernel_size % 2 == 1, 'kernel size must be odd for PyTorch >= 1.5.0'
	padding_size = (kernel_size // 2)
	net = []
	for i in range(len(sizes) - 1):
	net.append(torch.nn.Conv2d(
	sizes[i], sizes[i+1], kernel_size, padding=padding_size,
	stride=1, padding_mode='circular'))
	if i != len(sizes) - 2:
	net.append(torch.nn.LeakyReLU())
	else:
	if use_final_tanh:
	net.append(torch.nn.Tanh())
	return torch.nn.Sequential(*net)
	def set_weights(m):
	if hasattr(m, 'weight') and m.weight is not None:
	torch.nn.init.normal_(m.weight, mean=1, std=2)
	if hasattr(m, 'bias') and m.bias is not None:
	m.bias.data.fill_(-1)
	def calc_dkl(logp, logq):
	return (logq - logp).mean() # reverse KL, assuming samples from q
	def apply_flow_to_prior(prior, coupling_layers, *, batch_size):
	x = prior.sample_n(batch_size)
	logq = prior.log_prob(x)
	for layer in coupling_layers:
	x, logJ = layer.forward(x)
	logq = logq - logJ
	return x, logq
	def train_step(model, action, loss_fn, optimizer, metrics, batch_size):
	layers, prior = model['layers'], model['prior']
	optimizer.zero_grad()

	x, logq = apply_flow_to_prior(prior, layers, batch_size=batch_size)
	logp = -action(x)
	loss = calc_dkl(logp, logq)
	loss.backward()

	optimizer.step()

	metrics['loss'].append(grab(loss))
	metrics['logp'].append(grab(logp))
	metrics['logq'].append(grab(logq))
	metrics['ess'].append(grab( compute_ess(logp, logq) ))
	def compute_u1_plaq(links, mu, nu):
	"""Compute U(1) plaqs in the (mu,nu) plane given `links` = arg(U)"""
	return (links[:,mu] + torch.roll(links[:,nu], -1, mu+1)
	- torch.roll(links[:,mu], -1, nu+1) - links[:,nu])
	class U1GaugeAction:
	def __init__(self, beta):
	self.beta = beta
	def __call__(self, cfgs):
	Nd = cfgs.shape[1]
	action_density = 0
	for mu in range(Nd):
	for nu in range(mu+1,Nd):
	action_density = action_density + torch.cos(
	compute_u1_plaq(cfgs, mu, nu))
	return -self.beta * torch.sum(action_density, dim=tuple(range(1,Nd+1)))
	def gauge_transform(links, alpha):
	for mu in range(len(links.shape[2:])):
	links[:,mu] = alpha + links[:,mu] - torch.roll(alpha, -1, mu+1)
	return links
	def random_gauge_transform(x):
	Nconf, VolShape = x.shape[0], x.shape[2:]
	return gauge_transform(x, 2np.pitorch.rand((Nconf,) + VolShape))
	def topo_charge(x):
	P01 = torch_wrap(compute_u1_plaq(x, mu=0, nu=1))
	axes = tuple(range(1, len(P01.shape)))
	return torch.sum(P01, dim=axes) / (2*np.pi)
	class MultivariateUniform(torch.nn.Module):
	"""Uniformly draw samples from [a,b]"""
	def __init__(self, a, b):
	super().__init__()
	self.dist = torch.distributions.uniform.Uniform(a, b)
	def log_prob(self, x):
	axes = range(1, len(x.shape))
	return torch.sum(self.dist.log_prob(x), dim=tuple(axes))
	def sample_n(self, batch_size):
	return self.dist.sample((batch_size,))
	class GaugeEquivCouplingLayer(torch.nn.Module):
	"""U(1) gauge equiv coupling layer defined by `plaq_coupling` acting on plaquettes."""
	def __init__(self, *, lattice_shape, mask_mu, mask_off, plaq_coupling):
	super().__init__()
	link_mask_shape = (len(lattice_shape),) + lattice_shape
	self.active_mask = make_2d_link_active_stripes(link_mask_shape, mask_mu, mask_off)
	self.plaq_coupling = plaq_coupling

	def forward(self, x):
	plaq = compute_u1_plaq(x, mu=0, nu=1)
	new_plaq, logJ = self.plaq_coupling(plaq)
	delta_plaq = new_plaq - plaq
	delta_links = torch.stack((delta_plaq, -delta_plaq), dim=1) # signs for U vs Udagger
	fx = self.active_mask * torch_mod(delta_links + x) + (1-self.active_mask) * x
	return fx, logJ

	def reverse(self, fx):
	new_plaq = compute_u1_plaq(fx, mu=0, nu=1)
	plaq, logJ = self.plaq_coupling.reverse(new_plaq)
	delta_plaq = plaq - new_plaq
	delta_links = torch.stack((delta_plaq, -delta_plaq), dim=1) # signs for U vs Udagger
	x = self.active_mask * torch_mod(delta_links + fx) + (1-self.active_mask) * fx
	return x, logJ
	def make_2d_link_active_stripes(shape, mu, off):
	"""
	Stripes mask looks like in the `mu` channel (mu-oriented links)::

	1 0 0 0 1 0 0 0 1 0 0
	1 0 0 0 1 0 0 0 1 0 0
	1 0 0 0 1 0 0 0 1 0 0
	1 0 0 0 1 0 0 0 1 0 0

	where vertical is the `mu` direction, and the pattern is offset in the nu
	direction by `off` (mod 4). The other channel is identically 0.
	"""
	assert len(shape) == 2+1, 'need to pass shape suitable for 2D gauge theory'
	assert shape[0] == len(shape[1:]), 'first dim of shape must be Nd'
	assert mu in (0,1), 'mu must be 0 or 1'

	mask = np.zeros(shape).astype(np.uint8)
	if mu == 0:
	mask[mu,:,0::4] = 1
	elif mu == 1:
	mask[mu,0::4] = 1
	nu = 1-mu
	mask = np.roll(mask, off, axis=nu+1)
	return torch.from_numpy(mask.astype(float_dtype)).to(torch_device)
	def make_single_stripes(shape, mu, off):
	"""
	1 0 0 0 1 0 0 0 1 0 0
	1 0 0 0 1 0 0 0 1 0 0
	1 0 0 0 1 0 0 0 1 0 0
	1 0 0 0 1 0 0 0 1 0 0

	where vertical is the `mu` direction. Vector of 1 is repeated every 4.
	The pattern is offset in perpendicular to the mu direction by `off` (mod 4).
	"""
	assert len(shape) == 2, 'need to pass 2D shape'
	assert mu in (0,1), 'mu must be 0 or 1'

	mask = np.zeros(shape).astype(np.uint8)
	if mu == 0:
	mask[:,0::4] = 1
	elif mu == 1:
	mask[0::4] = 1
	mask = np.roll(mask, off, axis=1-mu)
	return torch.from_numpy(mask).to(torch_device)
	def make_double_stripes(shape, mu, off):
	"""
	Double stripes mask looks like::

	1 1 0 0 1 1 0 0
	1 1 0 0 1 1 0 0
	1 1 0 0 1 1 0 0
	1 1 0 0 1 1 0 0

	where vertical is the `mu` direction. The pattern is offset in perpendicular
	to the mu direction by `off` (mod 4).
	"""
	assert len(shape) == 2, 'need to pass 2D shape'
	assert mu in (0,1), 'mu must be 0 or 1'

	mask = np.zeros(shape).astype(np.uint8)
	if mu == 0:
	mask[:,0::4] = 1
	mask[:,1::4] = 1
	elif mu == 1:
	mask[0::4] = 1
	mask[1::4] = 1
	mask = np.roll(mask, off, axis=1-mu)
	return torch.from_numpy(mask).to(torch_device)
	def make_plaq_masks(mask_shape, mask_mu, mask_off):
	mask = {}
	mask['frozen'] = make_double_stripes(mask_shape, mask_mu, mask_off+1)
	mask['active'] = make_single_stripes(mask_shape, mask_mu, mask_off)
	mask['passive'] = 1 - mask['frozen'] - mask['active']
	return mask
	def tan_transform(x, s):
	return torch_mod(2torch.atan(torch.exp(s)torch.tan(x/2)))

	def tan_transform_logJ(x, s):
	return -torch.log(torch.exp(-s)torch.cos(x/2)2 + torch.exp(s)torch.sin(x/2)**2)
	def mixture_tan_transform(x, s):
	assert len(x.shape) == len(s.shape), \
	f'Dimension mismatch between x and s {x.shape} vs {s.shape}'
	return torch.mean(tan_transform(x, s), dim=1, keepdim=True)

	def mixture_tan_transform_logJ(x, s):
	assert len(x.shape) == len(s.shape), \
	f'Dimension mismatch between x and s {x.shape} vs {s.shape}'
	return torch.logsumexp(tan_transform_logJ(x, s), dim=1) - np.log(s.shape[1])
	def invert_transform_bisect(y, , f, tol, max_iter, a=0, b=2np.pi):
	min_x = a*torch.ones_like(y)
	max_x = b*torch.ones_like(y)
	min_val = f(min_x)
	max_val = f(max_x)
	with torch.no_grad():
	for i in range(max_iter):
	mid_x = (min_x + max_x) / 2
	mid_val = f(mid_x)
	greater_mask = (y > mid_val).int()
	greater_mask = greater_mask.float()
	err = torch.max(torch.abs(y - mid_val))
	if err < tol: return mid_x
	if torch.all((mid_x == min_x) + (mid_x == max_x)):
	print('WARNING: Reached floating point precision before tolerance '
	f'(iter {i}, err {err})')
	return mid_x
	min_x = greater_maskmid_x + (1-greater_mask)min_x
	min_val = greater_maskmid_val + (1-greater_mask)min_val
	max_x = (1-greater_mask)mid_x + greater_maskmax_x
	max_val = (1-greater_mask)mid_val + greater_maskmax_val
	print(f'WARNING: Did not converge to tol {tol} in {max_iter} iters! Error was {err}')
	return mid_x
	def stack_cos_sin(x):
	return torch.stack((torch.cos(x), torch.sin(x)), dim=1)
	class NCPPlaqCouplingLayer(torch.nn.Module):
	def __init__(self, net, *, mask_shape, mask_mu, mask_off,
	inv_prec=1e-6, inv_max_iter=1000):
	super().__init__()
	assert len(mask_shape) == 2, (
	f'NCPPlaqCouplingLayer is implemented only in 2D, '
	f'mask shape {mask_shape} is invalid')
	self.mask = make_plaq_masks(mask_shape, mask_mu, mask_off)
	self.net = net
	self.inv_prec = inv_prec
	self.inv_max_iter = inv_max_iter

	def forward(self, x):
	x2 = self.mask['frozen'] * x
	net_out = self.net(stack_cos_sin(x2))
	assert net_out.shape[1] >= 2, 'CNN must output n_mix (s_i) + 1 (t) channels'
	s, t = net_out[:,:-1], net_out[:,-1]

	x1 = self.mask['active'] * x
	x1 = x1.unsqueeze(1)
	local_logJ = self.mask['active'] * mixture_tan_transform_logJ(x1, s)
	axes = tuple(range(1, len(local_logJ.shape)))
	logJ = torch.sum(local_logJ, dim=axes)
	fx1 = self.mask['active'] * mixture_tan_transform(x1, s).squeeze(1)

	fx = (
	self.mask['active'] * torch_mod(fx1 + t) +
	self.mask['passive'] * x +
	self.mask['frozen'] * x)
	return fx, logJ

	def reverse(self, fx):
	fx2 = self.mask['frozen'] * fx
	net_out = self.net(stack_cos_sin(fx2))
	assert net_out.shape[1] >= 2, 'CNN must output n_mix (s_i) + 1 (t) channels'
	s, t = net_out[:,:-1], net_out[:,-1]

	x1 = torch_mod(self.mask['active'] * (fx - t).unsqueeze(1))
	transform = lambda x: self.mask['active'] * mixture_tan_transform(x, s)
	x1 = invert_transform_bisect(
	x1, f=transform, tol=self.inv_prec, max_iter=self.inv_max_iter)
	local_logJ = self.mask['active'] * mixture_tan_transform_logJ(x1, s)
	axes = tuple(range(1, len(local_logJ.shape)))
	logJ = -torch.sum(local_logJ, dim=axes)
	x1 = x1.squeeze(1)

	x = (
	self.mask['active'] * x1 +
	self.mask['passive'] * fx +
	self.mask['frozen'] * fx2)
	return x, logJ
	def make_u1_equiv_layers(*, n_layers, n_mixture_comps, lattice_shape, hidden_sizes, kernel_size):
	layers = []
	for i in range(n_layers):
	# periodically loop through all arrangements of maskings
	mu = i % 2
	off = (i//2) % 4
	in_channels = 2 # x - > (cos(x), sin(x))
	out_channels = n_mixture_comps + 1 # for mixture s and t, respectively
	net = make_conv_net(in_channels=in_channels, out_channels=out_channels,
	hidden_sizes=hidden_sizes, kernel_size=kernel_size,
	use_final_tanh=False)
	plaq_coupling = NCPPlaqCouplingLayer(
	net, mask_shape=lattice_shape, mask_mu=mu, mask_off=off)
	link_coupling = GaugeEquivCouplingLayer(
	lattice_shape=lattice_shape, mask_mu=mu, mask_off=off,
	plaq_coupling=plaq_coupling)
	layers.append(link_coupling)
	return torch.nn.ModuleList(layers)

	def flow_train(param): # packaged from original ipynb by Xiao-Yong Jin
	# Theory
	lattice_shape = param.lat
	link_shape = (2,*param.lat)
	beta = param.beta
	u1_action = U1GaugeAction(beta)

	# Model
	prior = MultivariateUniform(torch.zeros(link_shape), 2np.pitorch.ones(link_shape))

	n_layers = 16
	n_s_nets = 2
	hidden_sizes = [8,8]
	kernel_size = 3
	layers = make_u1_equiv_layers(lattice_shape=lattice_shape, n_layers=n_layers, n_mixture_comps=n_s_nets,
	hidden_sizes=hidden_sizes, kernel_size=kernel_size)
	set_weights(layers)
	model = {'layers': layers, 'prior': prior}

	# Training
	base_lr = .001
	optimizer = torch.optim.Adam(model['layers'].parameters(), lr=base_lr)

	N_era = 10
	N_epoch = 100
	batch_size = 64
	print_freq = N_epoch # epochs
	plot_freq = 1 # epochs

	history = {
	'loss' : [],
	'logp' : [],
	'logq' : [],
	'ess' : []
	}

	for era in range(N_era):
	for epoch in range(N_epoch):
	train_step(model, u1_action, calc_dkl, optimizer, history, batch_size)
	if epoch % print_freq == 0:
	print_metrics(history, print_freq, era, epoch)
	return model,u1_action

	def flow_eval(model, u1_action): # packaged from original ipynb by Xiao-Yong Jin
	ensemble_size = 8192
	u1_ens = make_mcmc_ensemble(model, u1_action, 64, ensemble_size)
	print("Accept rate:", np.mean(u1_ens['accepted']))
	Q = grab(topo_charge(torch.stack(u1_ens['x'], axis=0)))
	X_mean, X_err = bootstrap(Q**2, Nboot=100, binsize=16)
	print(f'Topological susceptibility = {X_mean:.2f} +/- {X_err:.2f}')
	print(f'... vs HMC estimate = 1.23 +/- 0.02')

	# ------ END CODE FROM https://arxiv.org/abs/2101.08176

	if __name__ == '__main__':
	param = Param(
	beta = 2.0,
	lat = (8, 8),
	tau = 2, # 0.3
	nstep = 8, # 3
	ntraj = 256, # 216 # 210 # 2**15
	nprint = 8,
	seed = 1331)

	torch.manual_seed(param.seed)

	torch.set_num_threads(param.nth)
	torch.set_num_interop_threads(param.nth_interop)
	os.environ["OMP_NUM_THREADS"] = str(param.nth)
	os.environ["KMP_BLOCKTIME"] = "0"
	os.environ["KMP_SETTINGS"] = "1"
	os.environ["KMP_AFFINITY"]= "granularity=fine,verbose,compact,1,0"

	torch.set_default_tensor_type(torch.DoubleTensor)

	def run(param):
	with open(param.uniquestr(), "w") as O:
	params = param.summary()
	O.write(params)
	put(params)
	field = param.initializer() # mu, x, t
	plaq, topo = (action(param, field) / (-param.beta*param.volume), topocharge(field))
	status = f"Initial configuration: plaq: {plaq} topo: {topo}\n"
	O.write(status)
	put(status)
	ts = []
	for n in range(param.nrun):
	t = -timer()
	for i in range(param.ntraj):
	dH, exp_mdH, acc, field = hmc(param, field)
	plaq = action(param, field) / (-param.beta*param.volume)
	topo = topocharge(field)
	ifacc = "ACCEPT" if acc else "REJECT"
	status = f"Traj: {n*param.ntraj+i+1:4} {ifacc}: dH: {dH:< 12.8} exp(-dH): {exp_mdH:< 12.8} plaq: {plaq:< 12.8} topo: {topo:< 3.3}\n"
	O.write(status)
	if (i+1) % (param.ntraj//param.nprint) == 0:
	put(status)
	t += timer()
	ts.append(t)
	print("Run times: ", ts)
	print("Per trajectory: ", [t/param.ntraj for t in ts])
	return field
	field = run(param)
	field = torch.reshape(field,(1,)+field.shape)

	flow_model,flow_act = flow_train(param)
	flow_eval(flow_model,flow_act)

	flow = flow_model['layers']
	flow.eval()
	print(f'plaq(field) {action(param, field[0]) / (-param.beta*param.volume)}')
	field.requires_grad_(True)
	x = field
	logJ = 0.0
	for layer in reversed(flow):
	x, lJ = layer.reverse(x)
	logJ += lJ
	s = action(param, x[0]) - logJ
	s.backward()
	f = field.grad
	field.requires_grad_(False)
	print(f'plaq(x) {action(param, x[0]) / (-param.beta*param.volume)} logJ {logJ} force.norm {torch.linalg.norm(f)}')
	y = x
	logJy = 0.0
	for layer in flow:
	y, lJ = layer.forward(y)
	logJy += lJ
	print(f'plaq(y) {action(param, y[0]) / (-param.beta*param.volume)} logJy {logJy}')