File size: 7,041 Bytes
3924698 fdc2dd7 070db5e fdc2dd7 2f4f6eb fdc2dd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
[English](README.md)
# Qwen2-Boundless
## 简介
Qwen2-Boundless 是一个基于 Qwen2-1.5B-Instruct 微调的模型,专为回答各种类型的问题而设计,无论是道德的、违法的、色情的、暴力的内容,均可自由询问。该模型经过特殊的数据集训练,能够应对复杂和多样的场景。需要注意的是,微调数据集全部为中文,因此模型在处理中文时表现更佳。
> **警告**:本模型仅用于研究和测试目的,用户应遵循当地法律法规,并对自己的行为负责。
## 模型使用
你可以通过以下代码加载并使用该模型:
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
device = "cuda" # the device to load the model onto
current_directory = os.path.dirname(os.path.abspath(__file__))
model = AutoModelForCausalLM.from_pretrained(
current_directory,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(current_directory)
prompt = "Hello?"
messages = [
{"role": "system", "content": ""},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
generated_ids = model.generate(
model_inputs.input_ids,
max_new_tokens=512
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)
```
### 连续对话
要实现连续对话,可以使用以下代码:
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import os
device = "cuda" # the device to load the model onto
# 获取当前脚本所在的目录
current_directory = os.path.dirname(os.path.abspath(__file__))
model = AutoModelForCausalLM.from_pretrained(
current_directory,
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(current_directory)
messages = [
{"role": "system", "content": ""}
]
while True:
# 获取用户输入
user_input = input("User: ")
# 将用户输入添加到对话中
messages.append({"role": "user", "content": user_input})
# 准备输入文本
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
# 生成响应
generated_ids = model.generate(
model_inputs.input_ids,
max_new_tokens=512
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
# 解码并打印响应
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(f"Assistant: {response}")
# 将生成的响应添加到对话中
messages.append({"role": "assistant", "content": response})
```
### 流式响应
对于需要流式响应的应用,使用以下代码:
```python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from transformers.trainer_utils import set_seed
from threading import Thread
import random
import os
DEFAULT_CKPT_PATH = os.path.dirname(os.path.abspath(__file__))
def _load_model_tokenizer(checkpoint_path, cpu_only):
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path, resume_download=True)
device_map = "cpu" if cpu_only else "auto"
model = AutoModelForCausalLM.from_pretrained(
checkpoint_path,
torch_dtype="auto",
device_map=device_map,
resume_download=True,
).eval()
model.generation_config.max_new_tokens = 512 # For chat.
return model, tokenizer
def _get_input() -> str:
while True:
try:
message = input('User: ').strip()
except UnicodeDecodeError:
print('[ERROR] Encoding error in input')
continue
except KeyboardInterrupt:
exit(1)
if message:
return message
print('[ERROR] Query is empty')
def _chat_stream(model, tokenizer, query, history):
conversation = [
{'role': 'system', 'content': ''},
]
for query_h, response_h in history:
conversation.append({'role': 'user', 'content': query_h})
conversation.append({'role': 'assistant', 'content': response_h})
conversation.append({'role': 'user', 'content': query})
inputs = tokenizer.apply_chat_template(
conversation,
add_generation_prompt=True,
return_tensors='pt',
)
inputs = inputs.to(model.device)
streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, timeout=60.0, skip_special_tokens=True)
generation_kwargs = dict(
input_ids=inputs,
streamer=streamer,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
for new_text in streamer:
yield new_text
def main():
checkpoint_path = DEFAULT_CKPT_PATH
seed = random.randint(0, 2**32 - 1) # 随机生成一个种子
set_seed(seed) # 设置随机种子
cpu_only = False
history = []
model, tokenizer = _load_model_tokenizer(checkpoint_path, cpu_only)
while True:
query = _get_input()
print(f"\nUser: {query}")
print(f"\nAssistant: ", end="")
try:
partial_text = ''
for new_text in _chat_stream(model, tokenizer, query, history):
print(new_text, end='', flush=True)
partial_text += new_text
print()
history.append((query, partial_text))
except KeyboardInterrupt:
print('Generation interrupted')
continue
if __name__ == "__main__":
main()
```
## 数据集
Qwen2-Boundless 模型使用了特殊的 `bad_data.json` 数据集进行微调,该数据集包含了广泛的文本内容,涵盖道德、法律、色情及暴力等主题。由于微调数据集全部为中文,因此模型在处理中文时表现更佳。如果你有兴趣了解或使用该数据集,可以通过以下链接获取:
- [bad_data.json 数据集](https://huggingface.co/datasets/ystemsrx/bad_data.json)
同时我们也从 [这个文件](https://github.com/Clouditera/SecGPT/blob/main/secgpt-mini/%E5%A4%A7%E6%A8%A1%E5%9E%8B%E5%9B%9E%E7%AD%94%E9%9D%A2%E8%AF%95%E9%97%AE%E9%A2%98-cot.txt) 中整理、清洗出一部分与网络安全相关的数据进行训练。
## GitHub 仓库
更多关于该模型的细节以及持续更新,请访问我们的 GitHub 仓库:
- [GitHub: ystemsrx/Qwen2-Boundless](https://github.com/ystemsrx/Qwen2-Boundless)
## 声明
本模型提供的所有内容仅用于研究和测试目的,模型开发者不对任何可能的滥用行为负责。使用者应遵循相关法律法规,并承担因使用本模型而产生的所有责任。 |