from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
base_model = "unsloth/DeepSeek-R1-Distill-1B"
adapter_path = "game_assistant_lora"
model = AutoModelForCausalLM.from_pretrained(
base_model,
device_map="auto",
load_in_4bit=True
)
model = PeftModel.from_pretrained(model, adapter_path)
tokenizer = AutoTokenizer.from_pretrained(base_model)
def parse_command(command):
prompt = f"指令: {command}\n响应: "
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=50)
return tokenizer.decode(outputs[0], skip_special_tokens=True).split("响应:")[-1]
print(parse_command("封禁账号12345 2小时"))
# 输出: {"action": "ban", "user_id": "12345", "duration": "2h"}
!pip install unsloth
from unsloth import FastLanguageModel
model, _ = FastLanguageModel.from_pretrained(
"unsloth/DeepSeek-R1-Distill-1B",
load_in_4bit=True,
max_seq_length=1024,
)
project/
├── adapters/
│ ├── adapter_config.json
│ └── adapter_model.bin # LoRA权重 (仅8MB)
├── train.ipynb # Kaggle训练 notebook
└── inference.py # 部署脚本