Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- 130m-ckpt-26/config.json +30 -0
- 130m-ckpt-26/fabric_ckpt/.metadata +3 -0
- 130m-ckpt-26/fabric_ckpt/__0_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__10_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__11_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__12_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__13_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__14_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__15_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__1_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__2_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__3_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__4_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__5_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__6_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__7_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__8_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/__9_0.distcp +3 -0
- 130m-ckpt-26/fabric_ckpt/meta.pt +3 -0
- 130m-ckpt-26/generation_config.json +7 -0
- 130m-ckpt-26/pytorch_model.bin +3 -0
- 130m-ckpt-26/special_tokens_map.json +23 -0
- 130m-ckpt-26/tokenizer.json +0 -0
- 130m-ckpt-26/tokenizer_config.json +41 -0
.gitattributes
CHANGED
|
@@ -50,3 +50,20 @@ fabric_ckpt/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
|
| 50 |
fabric_ckpt/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 51 |
fabric_ckpt/__8_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 52 |
fabric_ckpt/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
fabric_ckpt/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 51 |
fabric_ckpt/__8_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 52 |
fabric_ckpt/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
130m-ckpt-26/fabric_ckpt/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
130m-ckpt-26/fabric_ckpt/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
130m-ckpt-26/fabric_ckpt/__10_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
130m-ckpt-26/fabric_ckpt/__11_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
130m-ckpt-26/fabric_ckpt/__12_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
130m-ckpt-26/fabric_ckpt/__13_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
130m-ckpt-26/fabric_ckpt/__14_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
130m-ckpt-26/fabric_ckpt/__15_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
130m-ckpt-26/fabric_ckpt/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
130m-ckpt-26/fabric_ckpt/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
130m-ckpt-26/fabric_ckpt/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
130m-ckpt-26/fabric_ckpt/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
130m-ckpt-26/fabric_ckpt/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
130m-ckpt-26/fabric_ckpt/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
130m-ckpt-26/fabric_ckpt/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
130m-ckpt-26/fabric_ckpt/__8_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
130m-ckpt-26/fabric_ckpt/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
|
130m-ckpt-26/config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "llama-small",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"LlamaForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"attention_bias": false,
|
| 7 |
+
"attention_dropout": 0.0,
|
| 8 |
+
"bos_token_id": 1,
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 2048,
|
| 14 |
+
"max_position_embeddings": 2048,
|
| 15 |
+
"max_sequence_length": 2048,
|
| 16 |
+
"model_type": "llama",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"num_key_value_heads": 12,
|
| 20 |
+
"pad_token_id": 0,
|
| 21 |
+
"pretraining_tp": 1,
|
| 22 |
+
"rms_norm_eps": 1e-05,
|
| 23 |
+
"rope_scaling": null,
|
| 24 |
+
"rope_theta": 10000.0,
|
| 25 |
+
"tie_word_embeddings": false,
|
| 26 |
+
"torch_dtype": "float32",
|
| 27 |
+
"transformers_version": "4.40.0",
|
| 28 |
+
"use_cache": true,
|
| 29 |
+
"vocab_size": 32000
|
| 30 |
+
}
|
130m-ckpt-26/fabric_ckpt/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79e01d6b2ea99a6f5d2587b0ffbc5a611bc7c3260f053033652c7bdcc290483b
|
| 3 |
+
size 1345252
|
130m-ckpt-26/fabric_ckpt/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a5c446af4d2593869aea72cfb7be114145a6525a6b1c573eabb4da106efc99b
|
| 3 |
+
size 103842240
|
130m-ckpt-26/fabric_ckpt/__10_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b4d70d7f593e19adf11ae2c3e14957fa98e3b51a4a48b8ee26b2ae8bfcb7e5a
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__11_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddc3e6595a24bfd1f192bf6074fcb8fc8aabb8f568b97b672a40184a5083e241
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__12_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f407ab80b392f1c90e94a200f4f3b956c284b06720f12852e5ec71ef0d17d346
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__13_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:934721e37a5b0991fe9f267c113996e8f29bef3953d8267ce06b9080b99e76b6
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__14_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c63c8d65e8c25bd10874a49639dd11ef991594e9dece86678ea5f7ffe885c86
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__15_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fff6a224dd9775cf51ee4e722ac1479f4d62b099ae148969cbe734a4a008539
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0f7dce2490a475780a2b29e01b79b49c348435581b1af30782eb7186f170047
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:718e458487fb9d93fa6f093e128d51f15b1d5ee6becd8fb1cfd202b0d54a5c5b
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72fd7b0f7a71c7d7c285c8b1981b7f7bfa1231216059cdc9b9225c1b0d6aca67
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:610df5d32f04f782cc4532353065b70c3b03531e6cb3de0cbf15bc215e87f917
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9e59913014394378ea10370d0013b65b29d150d4f0eae1878746f4b9413f30e
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60e8357f8936c6ea3de05368b1e72240e08bcf8e34d73cd8396440ac64606a96
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:164833d81ef7a9611020065b76701bd5317bbf6220bb74b92e0e87964f9d3584
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__8_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b64d014fcf12e6cc948192a7e1f9ee9d8c5e2a4836b1492170ca321ca99f52f
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/__9_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c95f9de2bc3367552a387e9bf4d012ae7f2ddfcee43fd08f5055a4d70d41d78c
|
| 3 |
+
size 100972332
|
130m-ckpt-26/fabric_ckpt/meta.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f34a444488a0a786449d22801002188401f1ed541f45110f106ad5fb30679b1a
|
| 3 |
+
size 852
|
130m-ckpt-26/generation_config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 1,
|
| 4 |
+
"eos_token_id": 2,
|
| 5 |
+
"pad_token_id": 0,
|
| 6 |
+
"transformers_version": "4.40.0"
|
| 7 |
+
}
|
130m-ckpt-26/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14a0d2d6f3675c395139d7a8a8cadc7b24ab4d8589f908620036e7f99e72f04e
|
| 3 |
+
size 537163734
|
130m-ckpt-26/special_tokens_map.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"unk_token": {
|
| 17 |
+
"content": "<unk>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
}
|
| 23 |
+
}
|
130m-ckpt-26/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
130m-ckpt-26/tokenizer_config.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"0": {
|
| 6 |
+
"content": "<unk>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"1": {
|
| 14 |
+
"content": "<s>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"2": {
|
| 22 |
+
"content": "</s>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"bos_token": "<s>",
|
| 31 |
+
"clean_up_tokenization_spaces": false,
|
| 32 |
+
"eos_token": "</s>",
|
| 33 |
+
"legacy": false,
|
| 34 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 35 |
+
"pad_token": null,
|
| 36 |
+
"padding_side": "right",
|
| 37 |
+
"sp_model_kwargs": {},
|
| 38 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 39 |
+
"unk_token": "<unk>",
|
| 40 |
+
"use_default_system_prompt": false
|
| 41 |
+
}
|