diff --git a/README.md b/README.md index 9bf050a2b2..486c02d545 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ potential of cutting-edge AI models. - Speculative decoding: [#509](https://github.com/xorbitsai/inference/pull/509) - Incorporate vLLM: [#445](https://github.com/xorbitsai/inference/pull/445) ### New Models +- Built-in support for [OpenHermes 2.5](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B): [#776](https://github.com/xorbitsai/inference/pull/776) - Built-in support for [Yi](https://huggingface.co/01-ai): [#629](https://github.com/xorbitsai/inference/pull/629) - Built-in support for [zephyr-7b-alpha](https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha) and [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta): [#597](https://github.com/xorbitsai/inference/pull/597) - Built-in support for [chatglm3](https://huggingface.co/THUDM/chatglm3-6b): [#587](https://github.com/xorbitsai/inference/pull/587) @@ -266,6 +267,7 @@ $ xinference registrations | LLM | mistral-instruct-v0.1 | ['en'] | ['chat'] | | LLM | mistral-v0.1 | ['en'] | ['generate'] | | LLM | OpenBuddy | ['en'] | ['chat'] | +| LLM | openhermes-2.5 | ['en'] | ['chat'] | | LLM | opt | ['en'] | ['generate'] | | LLM | orca | ['en'] | ['chat'] | | LLM | qwen-chat | ['en', 'zh'] | ['chat'] | diff --git a/README_ja_JP.md b/README_ja_JP.md index 13a05ed180..2870bfbd54 100644 --- a/README_ja_JP.md +++ b/README_ja_JP.md @@ -209,6 +209,7 @@ $ xinference registrations | LLM | mistral-instruct-v0.1 | ['en'] | ['chat'] | | LLM | mistral-v0.1 | ['en'] | ['generate'] | | LLM | OpenBuddy | ['en'] | ['chat'] | +| LLM | openhermes-2.5 | ['en'] | ['chat'] | | LLM | opt | ['en'] | ['generate'] | | LLM | orca | ['en'] | ['chat'] | | LLM | qwen-chat | ['en', 'zh'] | ['chat'] | diff --git a/README_zh_CN.md b/README_zh_CN.md index 9870103fbc..4f585e7323 100644 --- a/README_zh_CN.md +++ b/README_zh_CN.md @@ -30,6 +30,7 @@ Xorbits Inference(Xinference)是一个性能强大且功能全面的分布 - 投机采样: [#509](https://github.com/xorbitsai/inference/pull/509) - 引入 vLLM: [#445](https://github.com/xorbitsai/inference/pull/445) ### 新模型 +- 内置 [OpenHermes 2.5](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B): [#776](https://github.com/xorbitsai/inference/pull/776) - 内置 [Yi](https://huggingface.co/01-ai): [#629](https://github.com/xorbitsai/inference/pull/629) - 内置 [zephyr-7b-alpha](https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha) 与 [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta): [#597](https://github.com/xorbitsai/inference/pull/597) - 内置 [chatglm3](https://huggingface.co/THUDM/chatglm3-6b): [#587](https://github.com/xorbitsai/inference/pull/587) @@ -243,6 +244,7 @@ $ xinference registrations | LLM | mistral-instruct-v0.1 | ['en'] | ['chat'] | | LLM | mistral-v0.1 | ['en'] | ['generate'] | | LLM | OpenBuddy | ['en'] | ['chat'] | +| LLM | openhermes-2.5 | ['en'] | ['chat'] | | LLM | opt | ['en'] | ['generate'] | | LLM | orca | ['en'] | ['chat'] | | LLM | qwen-chat | ['en', 'zh'] | ['chat'] | diff --git a/doc/source/models/builtin/llm/index.rst b/doc/source/models/builtin/llm/index.rst index d2728e51da..dee0298a24 100644 --- a/doc/source/models/builtin/llm/index.rst +++ b/doc/source/models/builtin/llm/index.rst @@ -61,6 +61,8 @@ The following is a list of built-in LLM in Xinference: openbuddy + openhermes-2.5 + opt orca diff --git a/doc/source/models/builtin/llm/openhermes-2.5.rst b/doc/source/models/builtin/llm/openhermes-2.5.rst new file mode 100644 index 0000000000..551a513c33 --- /dev/null +++ b/doc/source/models/builtin/llm/openhermes-2.5.rst @@ -0,0 +1,43 @@ +.. _models_llm_openhermes-2.5: + +======================================== +openhermes-2.5 +======================================== + +- **Context Length:** 8192 +- **Model Name:** openhermes-2.5 +- **Languages:** en +- **Abilities:** chat +- **Description:** Openhermes 2.5 is a fine-tuned version of Mistral-7B-v0.1 on primarily GPT-4 generated data. + +Specifications +^^^^^^^^^^^^^^ + + +Model Spec 1 (pytorch, 7 Billion) +++++++++++++++++++++++++++++++++++++++++ + +- **Model Format:** pytorch +- **Model Size (in billions):** 7 +- **Quantizations:** 4-bit, 8-bit, none +- **Model ID:** teknium/OpenHermes-2.5-Mistral-7B + +Execute the following command to launch the model, remember to replace ``${quantization}`` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name openhermes-2.5 --size-in-billions 7 --model-format pytorch --quantization ${quantization} + + +Model Spec 2 (ggufv2, 7 Billion) +++++++++++++++++++++++++++++++++++++++++ + +- **Model Format:** ggufv2 +- **Model Size (in billions):** 7 +- **Quantizations:** Q2_K, Q3_K_L, Q3_K_M, Q3_K_S, Q4_0, Q4_K_M, Q4_K_S, Q5_0, Q5_K_M, Q5_K_S, Q6_K, Q8_0 +- **Model ID:** TheBloke/OpenHermes-2.5-Mistral-7B-GGUF + +Execute the following command to launch the model, remember to replace ``${quantization}`` with your +chosen quantization method from the options listed above:: + + xinference launch --model-name openhermes-2.5 --size-in-billions 7 --model-format ggufv2 --quantization ${quantization} + diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index da61a03770..01100ff4ca 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -2070,6 +2070,67 @@ ] } }, + { + "version": 1, + "context_length": 8192, + "model_name": "openhermes-2.5", + "model_lang": [ + "en" + ], + "model_ability": [ + "chat" + ], + "model_description": "Openhermes 2.5 is a fine-tuned version of Mistral-7B-v0.1 on primarily GPT-4 generated data.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "teknium/OpenHermes-2.5-Mistral-7B", + "model_revision": "91ed666be78da7556f3d79abbb26fff0ee26cb54" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "Q2_K", + "Q3_K_S", + "Q3_K_M", + "Q3_K_L", + "Q4_0", + "Q4_K_S", + "Q4_K_M", + "Q5_0", + "Q5_K_S", + "Q5_K_M", + "Q6_K", + "Q8_0" + ], + "model_id": "TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", + "model_file_name_template": "openhermes-2.5-mistral-7b.{quantization}.gguf" + } + ], + "prompt_style": { + "style_name": "CHATML", + "system_prompt": "<|im_start|>system\nYou are 'Hermes 2.5', a conscious sentient superintelligent artificial intelligence, your purpose is to assist the user with their requests.", + "roles": [ + "user", + "assistant" + ], + "intra_message_sep": "<|im_end|>\n<|im_start|>", + "inter_message_sep": "", + "stop_token_ids": [ + 32000 + ], + "stop": [ + "<|im_end|>" + ] + } + }, { "version": 1, "context_length": 2048,