update_sig[source]

update_sig(d)

Update signature of f from dict d

class ConfigBase[source]

ConfigBase(verbose=False, warn=True, **kwargs)

Base class for Configs

class SyntheticConfig[source]

SyntheticConfig(vocab_sz=128, d_model=256, n_layers=1, n_heads=4, d_ff=256, attn_dropout=0.0, ff_dropout=0.0, emb_dropout=0.0, tie_weights=True, causal=True, pos_enc='absolute', max_seq_len=1024, axial_shape=None, axial_emb_dims=None, pad_idx=None, prenorm=False, attn_bias=False, bucket_size=64, use_lsh=True, n_hashes=4, seed=123) :: ConfigBase

Config for Synthetic Experiment. See https://arampacha.github.io/reformer_fastai/experiment.synthetic-task.html for details

synt_config = SyntheticConfig(n_hashes=8, seed=1, foo=1, verbose=True)
synt_config
Setting `n_hashes` = 8
Setting `seed` = 1
Parameter `foo` is not accepted by LSHLM. Skipped
LSHLM config 
--------------------
vocab_sz        128
d_model         256
n_layers        1
n_heads         4
d_ff            256
attn_dropout    0.0
ff_dropout      0.0
emb_dropout     0.0
tie_weights     True
causal          True
pos_enc         absolute
max_seq_len     1024
axial_shape     None
axial_emb_dims  None
pad_idx         None
prenorm         False
attn_bias       False
bucket_size     64
use_lsh         True
n_hashes        8
seed            1
synt_config.d_model
256
synt_config.n_hashes = 4
assert synt_config._d['n_hashes'] == 4
try: synt_config.foo = 1
except Exception as e: print(e)
LSHLM does not accept `foo` argument
synt_config['n_layers']
1

class TransformerLMConfigEnwik8[source]

TransformerLMConfigEnwik8(vocab_sz=256, d_model=1024, n_layers=3, n_heads=8, d_ff=4096, attn_dropout=0.1, ff_dropout=0.1, emb_dropout=0.1, tie_weights=True, causal=True, pos_enc='axial', max_seq_len=2048, axial_shape=(64, 32), axial_emb_dims=None, pad_idx=None, prenorm=False, attn_bias=False, shared_qk=False) :: ConfigBase

Config for enwik8 Experiment. See https://arampacha.github.io/reformer_fastai/experiment.enwik8-baseline.html for details

TransformerLMConfigEnwik8()
TransformerLM config 
--------------------
vocab_sz        256
d_model         1024
n_layers        3
n_heads         8
d_ff            4096
attn_dropout    0.1
ff_dropout      0.1
emb_dropout     0.1
tie_weights     True
causal          True
pos_enc         axial
max_seq_len     2048
axial_shape     (64, 32)
axial_emb_dims  None
pad_idx         None
prenorm         False
attn_bias       False
shared_qk       False

class ReversibleLMConfigEnwik8[source]

ReversibleLMConfigEnwik8(vocab_sz=256, d_model=1024, n_layers=3, n_heads=8, d_ff=4096, attn_dropout=0.1, ff_dropout=0.1, emb_dropout=0.1, tie_weights=True, causal=True, pos_enc='axial', max_seq_len=2048, axial_shape=(64, 32), axial_emb_dims=None, pad_idx=None, prenorm=True, attn_bias=False, rev_thres=0) :: ConfigBase

Config for enwik8 Experiment. See https://arampacha.github.io/reformer_fastai/experiment.enwik8-reversible.html for details

ReversibleLMConfigEnwik8(rev_thres=128)
ReversibleLM config 
--------------------
vocab_sz        256
d_model         1024
n_layers        3
n_heads         8
d_ff            4096
attn_dropout    0.1
ff_dropout      0.1
emb_dropout     0.1
tie_weights     True
causal          True
pos_enc         axial
max_seq_len     2048
axial_shape     (64, 32)
axial_emb_dims  None
pad_idx         None
prenorm         True
attn_bias       False
rev_thres       128

class NHashesConfig[source]

NHashesConfig(vocab_sz=256, d_model=1024, n_layers=3, n_heads=8, d_ff=4096, attn_dropout=0.1, ff_dropout=0.1, emb_dropout=0.1, tie_weights=True, causal=True, pos_enc='axial', max_seq_len=4096, axial_shape=None, axial_emb_dims=None, pad_idx=None, prenorm=False, attn_bias=False, bucket_size=64, use_lsh=True, n_hashes=2, seed=842) :: ConfigBase

Config for evaluating performance as function of n_hashes. See https://arampacha.github.io/reformer_fastai/experiment.enwik8-n_hashes.html for details

cfg = NHashesConfig()
cfg
LSHLM config 
--------------------
vocab_sz        256
d_model         1024
n_layers        3
n_heads         8
d_ff            4096
attn_dropout    0.1
ff_dropout      0.1
emb_dropout     0.1
tie_weights     True
causal          True
pos_enc         axial
max_seq_len     4096
axial_shape     None
axial_emb_dims  None
pad_idx         None
prenorm         False
attn_bias       False
bucket_size     64
use_lsh         True
n_hashes        2
seed            842

class NLayersConfig[source]

NLayersConfig(vocab_sz=256, d_model=1024, n_layers=3, n_heads=8, d_ff=4096, ff_chunks=64, attn_dropout=0.1, ff_dropout=0.1, emb_dropout=0.1, tie_weights=True, causal=True, pos_enc='axial', max_seq_len=16384, axial_shape=None, axial_emb_dims=None, pad_idx=None, prenorm=True, attn_bias=False, bucket_size=64, use_lsh=True, n_hashes=8, rev_thres=0, seed=842) :: ConfigBase

Config for evaluating performance as function of n_layers. See https://arampacha.github.io/reformer_fastai/experiment.enwik8-n_layers.html for details

cfg = NLayersConfig()
cfg
ReformerLM config 
--------------------
vocab_sz        256
d_model         1024
n_layers        3
n_heads         8
d_ff            4096
ff_chunks       64
attn_dropout    0.1
ff_dropout      0.1
emb_dropout     0.1
tie_weights     True
causal          True
pos_enc         axial
max_seq_len     16384
axial_shape     None
axial_emb_dims  None
pad_idx         None
prenorm         True
attn_bias       False
bucket_size     64
use_lsh         True
n_hashes        8
rev_thres       0
seed            842

class ReversibleTransformerConfigWMT[source]

ReversibleTransformerConfigWMT(enc_vocab_sz=33708, dec_vocab_sz=33708, n_enc_layers=6, n_dec_layers=6, n_heads=8, d_model=512, d_ff=2048, attn_dropout=0.1, ff_dropout=0.1, emb_dropout=0.1, tie_weights=True, shared_emb=True, pos_enc='fixed', max_seq_len=256, axial_shape=(64, 32), axial_emb_dims=None, pad_idx=None, prenorm=False, attn_bias=False, comb_attn=False) :: ConfigBase

Config for WMT Experiment. See https://arampacha.github.io/reformer_fastai/

ReversibleTransformerConfigWMT(n_enc_layers=2, n_dec_layers=2)
ReversibleTransformer config 
--------------------
enc_vocab_sz    33708
dec_vocab_sz    33708
n_enc_layers    2
n_dec_layers    2
n_heads         8
d_model         512
d_ff            2048
attn_dropout    0.1
ff_dropout      0.1
emb_dropout     0.1
tie_weights     True
shared_emb      True
pos_enc         fixed
max_seq_len     256
axial_shape     (64, 32)
axial_emb_dims  None
pad_idx         None
prenorm         False
attn_bias       False
comb_attn       False

class TransformerConfigWMT[source]

TransformerConfigWMT(enc_vocab_sz=33708, dec_vocab_sz=33708, n_enc_layers=6, n_dec_layers=6, n_heads=8, d_model=512, d_ff=2048, attn_dropout=0.1, ff_dropout=0.1, emb_dropout=0.1, tie_weights=True, shared_emb=True, pos_enc='fixed', max_seq_len=256, axial_shape=(64, 32), axial_emb_dims=None, pad_idx=None, prenorm=False, attn_bias=False, comb_attn=True) :: ConfigBase

Config for WMT Experiment. See https://arampacha.github.io/reformer_fastai/

TransformerConfigWMT(n_enc_layers=1, n_dec_layers=1)
Transformer config 
--------------------
enc_vocab_sz    33708
dec_vocab_sz    33708
n_enc_layers    1
n_dec_layers    1
n_heads         8
d_model         512
d_ff            2048
attn_dropout    0.1
ff_dropout      0.1
emb_dropout     0.1
tie_weights     True
shared_emb      True
pos_enc         fixed
max_seq_len     256
axial_shape     (64, 32)
axial_emb_dims  None
pad_idx         None
prenorm         False
attn_bias       False
comb_attn       True