Skip to content

Commit

Permalink
Merge branch 'master' into fix-#286
Browse files Browse the repository at this point in the history
  • Loading branch information
goerch authored Jul 2, 2023
2 parents 58ed3ad + b22fb03 commit f863199
Show file tree
Hide file tree
Showing 35 changed files with 3,247 additions and 926 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ src/arm_neon.h
tests/arm_neon.h

zig-out/
zig-cache/
zig-cache/

*.dot
69 changes: 49 additions & 20 deletions build.zig
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
const std = @import("std");

// Zig Version: 0.11.0-dev.3798+a5e15eced
// Zig Version: 0.11.0-dev.3886+0c1bfe271
// Zig Build Command: zig build
// Zig Run Command:
// zig build run_dolly-v2
// zig build run_gpt-2
// zig build run_gpt-j
// zig build run_gpt-neox
// zig build run_mnist
// zig build run_mpt
// zig build run_replit
// zig build run_starcoder
// zig build run_test-grad0
// zig build run_test-mul-mat0
// zig build run_test-mul-mat2
// zig build run_test-opt
// zig build run_test-vec1
// zig build run_test0
// zig build run_test1
// zig build run_test2
// zig build run_test3
// Zig Run Command: zig build -h
// zig build run_dolly-v2
// zig build run_gpt-2
// zig build run_gpt-j
// zig build run_gpt-neox
// zig build run_mnist
// zig build run_mpt
// zig build run_replit
// zig build run_starcoder
// zig build run_test-grad0
// zig build run_test-mul-mat0
// zig build run_test-mul-mat2
// zig build run_test-opt
// zig build run_test-vec1
// zig build run_test0
// zig build run_test1
// zig build run_test2
// zig build run_test3
// zig build run_zig_test0
// zig build run_zig_test1
// zig build run_zig_test2
// zig build run_zig_test3
pub fn build(b: *std.build.Builder) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
Expand Down Expand Up @@ -110,4 +114,29 @@ pub fn build(b: *std.build.Builder) void {
const run_step = b.step("run_" ++ name, "Run tests");
run_step.dependOn(&run_cmd.step);
}
}

// zig_tests
const zig_tests = .{
"test0",
"test1",
"test2",
"test3",
};
inline for (zig_tests) |name| {
const exe = b.addExecutable(.{
.name = name,
.root_source_file = .{ .path = std.fmt.comptimePrint("tests/{s}.zig", .{name}) },
.target = target,
.optimize = optimize,
});
exe.addIncludePath("./include");
exe.addIncludePath("./include/ggml");
exe.linkLibrary(lib);
b.installArtifact(exe);
const run_cmd = b.addRunArtifact(exe);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| run_cmd.addArgs(args);
const run_step = b.step("run_zig_" ++ name, "Run zig_tests");
run_step.dependOn(&run_cmd.step);
}
}
6 changes: 6 additions & 0 deletions examples/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
params.top_p = std::stof(argv[++i]);
} else if (arg == "--temp") {
params.temp = std::stof(argv[++i]);
} else if (arg == "--repeat-last-n") {
params.repeat_last_n = std::stof(argv[++i]);
} else if (arg == "--repeat-penalty") {
params.repeat_penalty = std::stof(argv[++i]);
} else if (arg == "-b" || arg == "--batch_size") {
params.n_batch = std::stoi(argv[++i]);
} else if (arg == "-m" || arg == "--model") {
Expand Down Expand Up @@ -90,6 +94,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, " --top_k N top-k sampling (default: %d)\n", params.top_k);
fprintf(stderr, " --top_p N top-p sampling (default: %.1f)\n", params.top_p);
fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp);
fprintf(stderr, " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled)\n", params.repeat_last_n);
fprintf(stderr, " --repeat-penalty N penalize repeat sequence of tokens (default: %.2f, 1.0 = disabled)\n", (double)params.repeat_penalty);
fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch);
fprintf(stderr, " -m FNAME, --model FNAME\n");
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
Expand Down
2 changes: 2 additions & 0 deletions examples/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ struct gpt_params {
int32_t top_k = 40;
float top_p = 0.9f;
float temp = 0.9f;
int32_t repeat_last_n = 64;
float repeat_penalty = 1.00f;

int32_t n_batch = 8; // batch size for prompt processing

Expand Down
6 changes: 3 additions & 3 deletions examples/dolly-v2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ bool dollyv2_model_load(const std::string & fname, dollyv2_model & model, gpt_vo
{
uint32_t magic;
fin.read((char *) &magic, sizeof(magic));
if (magic != 0x67676d6c) {
if (magic != GGML_FILE_MAGIC) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
return false;
}
Expand Down Expand Up @@ -523,8 +523,8 @@ bool dollyv2_eval(
struct ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd/n_head, n_head, N, cur->nb[1]/n_head, cur->nb[1], 2*sizeof(float)*n_embd/n_head));

// using mode = 2 for GPT-NeoX mode
Qcur = ggml_rope_inplace(ctx0, Qcur, n_past, n_rot, 2);
Kcur = ggml_rope_inplace(ctx0, Kcur, n_past, n_rot, 2);
Qcur = ggml_rope_inplace(ctx0, Qcur, n_past, n_rot, 2, 0);
Kcur = ggml_rope_inplace(ctx0, Kcur, n_past, n_rot, 2, 0);

// store key and value to memory
{
Expand Down
2 changes: 1 addition & 1 deletion examples/dolly-v2/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ bool dollyv2_model_quantize(const std::string & fname_inp, const std::string & f
{
uint32_t magic;
finp.read((char *) &magic, sizeof(magic));
if (magic != 0x67676d6c) {
if (magic != GGML_FILE_MAGIC) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str());
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
{
uint32_t magic;
fin.read((char *) &magic, sizeof(magic));
if (magic != 0x67676d6c) {
if (magic != GGML_FILE_MAGIC) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-2/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ bool gpt2_model_quantize(const std::string & fname_inp, const std::string & fnam
{
uint32_t magic;
finp.read((char *) &magic, sizeof(magic));
if (magic != 0x67676d6c) {
if (magic != GGML_FILE_MAGIC) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str());
return false;
}
Expand Down
6 changes: 3 additions & 3 deletions examples/gpt-j/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
{
uint32_t magic;
fin.read((char *) &magic, sizeof(magic));
if (magic != 0x67676d6c) {
if (magic != GGML_FILE_MAGIC) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
return false;
}
Expand Down Expand Up @@ -452,8 +452,8 @@ bool gptj_eval(

// self-attention
{
struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_q_proj_w, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_k_proj_w, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_q_proj_w, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_k_proj_w, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);

// store key and value to memory
{
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-j/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ bool gptj_model_quantize(const std::string & fname_inp, const std::string & fnam
{
uint32_t magic;
finp.read((char *) &magic, sizeof(magic));
if (magic != 0x67676d6c) {
if (magic != GGML_FILE_MAGIC) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str());
return false;
}
Expand Down
6 changes: 3 additions & 3 deletions examples/gpt-neox/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt_
{
uint32_t magic;
fin.read((char *) &magic, sizeof(magic));
if (magic != 0x67676d6c) {
if (magic != GGML_FILE_MAGIC) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
return false;
}
Expand Down Expand Up @@ -517,8 +517,8 @@ bool gpt_neox_eval(
struct ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd/n_head, n_head, N, cur->nb[1]/n_head, cur->nb[1], 2*sizeof(float)*n_embd/n_head));

// using mode = 2 for GPT-NeoX mode
Qcur = ggml_rope_inplace(ctx0, Qcur, n_past, n_rot, 2);
Kcur = ggml_rope_inplace(ctx0, Kcur, n_past, n_rot, 2);
Qcur = ggml_rope_inplace(ctx0, Qcur, n_past, n_rot, 2, 0);
Kcur = ggml_rope_inplace(ctx0, Kcur, n_past, n_rot, 2, 0);

// store key and value to memory
{
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-neox/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ bool gpt_neox_model_quantize(const std::string & fname_inp, const std::string &
{
uint32_t magic;
finp.read((char *) &magic, sizeof(magic));
if (magic != 0x67676d6c) {
if (magic != GGML_FILE_MAGIC) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str());
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion examples/mnist/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ bool mnist_model_load(const std::string & fname, mnist_model & model) {
{
uint32_t magic;
fin.read((char *) &magic, sizeof(magic));
if (magic != 0x67676d6c) {
if (magic != GGML_FILE_MAGIC) {
fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
return false;
}
Expand Down
27 changes: 27 additions & 0 deletions examples/mpt/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# MPT

Ref: https://github.com/mosaicml/llm-foundry#mpt

## Usage

```bash
# get the repo and build it
git clone https://github.com/ggerganov/ggml
cd ggml
mkdir build && cd build
cmake ..
make -j

# get the model from HuggingFace
# be sure to have git-lfs installed
git clone https://huggingface.co/mosaicml/mpt-30b

# convert model to FP16
python3 ../examples/mpt/convert-h5-to-ggml.py ./mpt-30b 1

# run inference using FP16 precision
./bin/mpt -m ./mpt-30b/ggml-model-f16.bin -p "I believe the meaning of life is" -t 8 -n 64

# quantize the model to 5-bits using Q5_0 quantization
./bin/mpt-quantize ./mpt-30b/ggml-model-f16.bin ./mpt-30b/ggml-model-q5_0.bin q5_0
```
Loading

0 comments on commit f863199

Please sign in to comment.