Model Init

with torch.no_grad():
    tst = nn.Linear(4, 5)
    tst.weight.data.uniform_(-1, 1)
    tst.bias.data.uniform_(-1, 1)
    tst = init_default(tst, func=lambda x: x.data.fill_(1.0))
    test_eq(tst.weight, torch.ones(5, 4))
    test_eq(tst.bias, torch.zeros(5))

with torch.no_grad():
    tst = nn.Linear(4, 5)
    tst.weight.data.uniform_(-1, 1)
    tst.bias.data.uniform_(-1, 1)
    cond_init(tst, func=lambda x: x.data.fill_(1.0))
    test_eq(tst.weight, torch.ones(5, 4))
    test_eq(tst.bias, torch.zeros(5))

    tst = nn.BatchNorm2d(5)
    init = [tst.weight.clone(), tst.bias.clone()]
    cond_init(tst, func=lambda x: x.data.fill_(1.0))
    test_eq(tst.weight, init[0])
    test_eq(tst.bias, init[1])

tst = nn.Sequential(nn.Linear(4, 5), nn.Sequential(nn.Linear(4, 5), nn.Linear(4, 5)))
apply_leaf(tst, partial(init_default, func=lambda x: x.data.fill_(1.0)))


with torch.no_grad():
    for l in [tst[0], *tst[1]]:
        test_eq(l.weight, torch.ones(5, 4))

    for l in [tst[0], *tst[1]]:
        test_eq(l.bias, torch.zeros(5))

tst = nn.Sequential(nn.Linear(4, 5), nn.Sequential(nn.Linear(4, 5), nn.BatchNorm1d(5)))
init = [tst[1][1].weight.clone(), tst[1][1].bias.clone()]
apply_init(tst, func=lambda x: x.data.fill_(1.0))

with torch.no_grad():
    for l in [tst[0], tst[1][0]]:
        test_eq(l.weight, torch.ones(5, 4))
    for l in [tst[0], tst[1][0]]:
        test_eq(l.bias, torch.zeros(5))
        test_eq(tst[1][1].weight, init[0])
        test_eq(tst[1][1].bias, init[1])

Miscellaneous Functions

model = nn.Sequential(nn.Linear(4, 5), nn.BatchNorm1d(5), nn.Linear(5, 1))

grab the first BatchNorm layer, and store its running mean:

m = model[1].running_mean.clone()

You can see that now that running mean has changed:

i = torch.randn(32, 4)
o = model(i)
test_ne(m, model[1].running_mean.detach())

When we use the set_bn_eval function, the running statistics will not be changed during training

model = nn.Sequential(nn.Linear(4, 5), nn.BatchNorm1d(5))
model.train()
model.eval()
m = model[1].running_mean.clone()

set_bn_eval(model)

i = torch.randn(32, 4)
o = model(i)

test_eq(m, model[1].running_mean.detach())

with torch.no_grad():
    m = nn.Linear(4, 5)
    test_eq(trainable_params(m), [m.weight, m.bias])

    m.weight.requires_grad_(False)
    test_eq(trainable_params(m), [m.bias])
    test_eq(params(m), [m.weight, m.bias])

output = torch.randn(10, 10)

t0 = torch.nn.functional.one_hot(torch.arange(0, 10) % 3, num_classes=10)
t1 = torch.arange(0, 10) % 3

o0 = maybe_convert_to_onehot(t0, output)
o1 = maybe_convert_to_onehot(t1, output)

test_eq(o0.shape, output.shape)
test_eq(o1.shape, output.shape)
test_eq(t0, o0)

... We can see that maybe_convert_to_onehot converted t1 to a one_hot encoded tensor but did not change t0 because it was already one_hot in encoded form/shape.

model = nn.Sequential(nn.Linear(4, 5), nn.BatchNorm1d(5))
param_list = [{"params": [p for p in model.parameters()]}]
param_list_dl, lrs = build_discriminative_lrs(param_list, lr_stop=1e-03)

assert len(param_list_dl) == 1
assert lrs == [1e-03]
assert param_list_dl[0]["lr"] == lrs[0]

model = nn.Sequential(nn.Linear(4, 5), nn.Linear(5, 10))
p1 = [{"params": [p for p in model[0].parameters()]}]
p2 = [{"params": [p for p in model[1].parameters()]}]
param_list = p1 + p2
param_list_dl, lrs = build_discriminative_lrs(param_list, lr_stop=1e-03)

assert len(param_list_dl) == 2
assert len(lrs) == 2

Utilitites

Model Init

`init_default`[source]

`cond_init`[source]

`apply_leaf`[source]

`apply_init`[source]

Miscellaneous Functions

`set_bn_eval`[source]

`trainable_params`[source]

`params`[source]

`maybe_convert_to_onehot`[source]

`worker_init_fn`[source]

`build_discriminative_lrs`[source]

Utilitites

Model Init

init_default[source]

cond_init[source]

apply_leaf[source]

apply_init[source]

Miscellaneous Functions

set_bn_eval[source]

trainable_params[source]

params[source]

maybe_convert_to_onehot[source]

worker_init_fn[source]

build_discriminative_lrs[source]

`init_default`[source]

`cond_init`[source]

`apply_leaf`[source]

`apply_init`[source]

`set_bn_eval`[source]

`trainable_params`[source]

`params`[source]

`maybe_convert_to_onehot`[source]

`worker_init_fn`[source]

`build_discriminative_lrs`[source]