Download Latest Version Less VRAM + bug fixes source code.tar.gz (4.7 MB)
Email in envelope

Get an email when there's a new version of Unsloth

Home / June-2025
Name Modified Size InfoDownloads / Week
Parent folder
Gemma 3n + Text-to-speech (TTS) source code.tar.gz 2025-06-26 4.6 MB
Gemma 3n + Text-to-speech (TTS) source code.zip 2025-06-26 4.8 MB
README.md 2025-06-26 23.2 kB
Totals: 3 Items   9.4 MB 0

✨ Gemma 3n now available

  • Google's new Gemma 3n multimodal models in 2B (E2B) and 4B (E4B) sizes
  • Supports audio, vision, video and text inputs
  • Available in safetensors, GGUF and dynamic 4bit BnB for finetuning.
  • HuggingFace Collection Link: Gemma-3n

🎵 Text-to-Speech (TTS) Fine-tuning

[!TIP] Update Unsloth via pip install --upgrade --force-reinstall unsloth unsloth_zoo

🧠 DeepSeek-R1-0528 Support with Dynamic 1-bit GGUFs

📈 Dynamic 2.0 GGUFs

⚡ Advanced Qwen3 GRPO notebook

  • Proximity scoring for more nuanced reward functions
  • OpenR1 dataset support with advanced templates
  • Prefinetuning to skip GRPO format learning
  • https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_(4B)-GRPO.ipynb

    :::python

    DeepSeek-R1 GRPO Fine-tuning Example: convert DeepSeek-R1-0528-Qwen3-8B into a reasoning model via GRPO by using OpenR1's Math dataset.

    from unsloth import FastLanguageModel import torch max_seq_length = 1024 # Can increase for longer reasoning traces lora_rank = 32 # Larger rank = smarter, but slower

    model, tokenizer = FastLanguageModel.from_pretrained( model_name = "unsloth/DeepSeek-R1-0528-Qwen3-8B", max_seq_length = max_seq_length, load_in_4bit = True, # False for LoRA 16bit fast_inference = True, # Enable vLLM fast inference max_lora_rank = lora_rank, gpu_memory_utilization = 0.7, # Reduce if out of memory )

    model = FastLanguageModel.get_peft_model( model, r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128 target_modules = [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ], lora_alpha = lora_rank2, # 2 speeds up training use_gradient_checkpointing = "unsloth", # Reduces memory usage random_state = 3407, )

    reasoning_start = None reasoning_end = None user_token = None assistant_token = None

    for token in tokenizer.get_added_vocab().keys(): if "think" in token and "/" in token: reasoning_end = token elif "think" in token: reasoning_start = token elif "user" in token: user_token = token elif "assistant" in token: assistant_token = token

    system_prompt = \ f"""You are given a problem. Think about the problem and provide your working out. You must think in Bahasa Indonesia."""

    print(tokenizer.apply_chat_template([ {"role" : "user", "content" : "What is 1+1?"}, {"role" : "assistant", "content" : f"<think>I think it's 2.2</think>2"}, {"role" : "user", "content" : "What is 1+1?"}, {"role" : "assistant", "content" : f"<think>I think it's 2.2</think>2"}, ], tokenize = False, add_generation_prompt = True))

    from datasets import load_dataset dataset = load_dataset("open-r1/DAPO-Math-17k-Processed", "en", split = "train")

    def extract_hash_answer(text): # if "####" not in text: return None # return text.split("####")[1].strip() return text

    dataset = dataset.map(lambda x: { "prompt" : [ {"role": "system", "content": system_prompt}, {"role": "user", "content": x["prompt"]}, ], "answer": extract_hash_answer(x["solution"]), })

    Add optional EOS token matching

    solution_end_regex = rf"{reasoning_end}(.*)"

    match_format = re.compile(solution_end_regex, re.DOTALL) match_format

    """We verify it works:"""

    match_format.findall( "Let me think!</think>"\ f"Hence, the solution is 2.", )

    match_format.findall( "<think>Let me think!</think>"\ f"\n\nHence, the solution is 2", )

    def match_format_exactly(completions, **kwargs): scores = [] for completion in completions: score = 0 response = completion[0]["content"] # Match if format is seen exactly! if match_format.search(response) is not None: score += 3.0 scores.append(score) return scores

    """If it fails, we want to reward the model if it at least follows the format partially, by counting each symbol:"""

    def match_format_approximately(completions, **kwargs): scores = [] for completion in completions: score = 0 response = completion[0]["content"] # Count how many keywords are seen - we penalize if too many! # If we see 1, then plus some points!

        # No need to reward <think> since we always prepend it!
        score += 0.5 if response.count(reasoning_start) == 1 else -1.0
        score += 0.5 if response.count(reasoning_end)   == 1 else -1.0
        scores.append(score)
    return scores
    

    """We want to extract the generated answer, and reward or penalize it! We also reward it based on how close the answer is to the true one via ratios:"""

    def check_answer(prompts, completions, answer, **kwargs): question = prompts[0][-1]["content"] responses = [completion[0]["content"] for completion in completions]

    extracted_responses = [
        guess.group(1)
        if (guess := match_format.search(r)) is not None else None \
        for r in responses
    ]
    
    scores = []
    for guess, true_answer in zip(extracted_responses, answer):
        score = 0
        if guess is None:
            scores.append(-2.0)
            continue
        # Correct answer gets 5 points!
        if guess == true_answer:
            score += 5.0
        # Match if spaces are seen, but less reward
        elif guess.strip() == true_answer.strip():
            score += 3.5
        else:
            # We also reward it if the answer is close via ratios!
            # Ie if the answer is within some range, reward it!
            try:
                ratio = float(guess) / float(true_answer)
                if   ratio >= 0.9 and ratio <= 1.1: score += 2.0
                elif ratio >= 0.8 and ratio <= 1.2: score += 1.5
                else: score -= 2.5 # Penalize wrong answers
            except:
                score -= 4.5 # Penalize
        scores.append(score)
    return scores
    

    match_numbers = re.compile( r".*?[\s]{0,}([-]?[\d.\,]{1,})", flags = re.MULTILINE | re.DOTALL ) print(match_numbers.findall(" 0.34 ")) print(match_numbers.findall(" 123,456 ")) print(match_numbers.findall(" -0.234 ")) print(match_numbers.findall("17"))

    import langid

    def get_lang(text: str) -> str: if not text: return "und" lang, _ = langid.classify(text) return lang

    print(get_lang("Hello, How are you")) # This should return en print(get_lang("Aku berpikir kalau aku adalah kamu")) # This should return id print(get_lang("我在这里")) # This should return zh

    import re

    def format_and_language_reward_func(completions, **kwargs): scores = []

    for completion_item in completions:
        if not completion_item or not isinstance(completion_item[0], dict) or "content" not in completion_item[0]:
            scores.append(-5.0)
            print(f"Warning: Malformed completion item, assigning default low score: {completion_item}")
            continue
    
        content = completion_item[0]["content"]
    
        lang = get_lang(content)
    
        if lang == 'id':
            score = 5.0
        elif lang == 'en':
            score = -3.0
        elif lang == 'zh':
            score = -3.0
        else:
            score = -5.0
    
        scores.append(score)
    
    return scores
    

    prompts = [ [{"role": "assistant", "content": "What is the result of (1 + 2) * 4?"}], [{"role": "assistant", "content": "What is the result of (3 + 1) * 2?"}], ] completions = [ [{"role": "assistant", "content": "<think>The sum of 1 and 2 is 3, which we multiply by 4 to get 12.</think><answer>(1 + 2) * 4 = 12</answer>"}], [{"role": "assistant", "content": "The sum of 3 and 1 is 4, which we multiply by 2 to get 8. So (3 + 1) * 2 = 8."}], ] format_and_language_reward_func(prompts=prompts, completions=completions)

    global PRINTED_TIMES PRINTED_TIMES = 0 global PRINT_EVERY_STEPS PRINT_EVERY_STEPS = 5

    def check_numbers(prompts, completions, answer, **kwargs): question = prompts[0][-1]["content"] responses = [completion[0]["content"] for completion in completions]

    extracted_responses = [
        guess.group(1)
        if (guess := match_numbers.search(r)) is not None else None \
        for r in responses
    ]
    
    scores = []
    # Print only every few steps
    global PRINTED_TIMES
    global PRINT_EVERY_STEPS
    if PRINTED_TIMES % PRINT_EVERY_STEPS == 0:
        print(
            '*'*20 + f"Question:\n{question}", f"\nAnswer:\n{answer[0]}", f"\nResponse:\n{responses[0]}", f"\nExtracted:\n{extracted_responses[0]}"
        )
    PRINTED_TIMES += 1
    
    for guess, true_answer in zip(extracted_responses, answer):
        if guess is None:
            scores.append(-2.5)
            continue
        # Convert to numbers
        try:
            true_answer = float(true_answer.strip())
            # Remove commas like in 123,456
            guess       = float(guess.strip().replace(",", ""))
            scores.append(3.5 if guess == true_answer else -1.5)
        except:
            scores.append(0)
            continue
    return scores
    

    tokenized = dataset.map( lambda x: {"tokens" : tokenizer.apply_chat_template(x["prompt"], add_generation_prompt = True, tokenize = True)}, batched = True, ) print(tokenizer.decode(tokenized[0]["tokens"])) tokenized = tokenized.map(lambda x: {"L" : len(x["tokens"])})

    import numpy as np maximum_length = int(np.quantile(tokenized["L"], 0.9)) print("Max Length = ", maximum_length)

    Filter only samples smaller than 90% max length

    dataset = dataset.select(np.where(np.array(tokenized["L"]) <= maximum_length)[0]) del tokenized

    max_prompt_length = maximum_length + 1 # + 1 just in case! max_completion_length = max_seq_length - max_prompt_length

    from vllm import SamplingParams vllm_sampling_params = SamplingParams( min_p = 0.1, top_p = 1.0, top_k = -1, seed = 3407, stop = [tokenizer.eos_token], include_stop_str_in_output = True, )

    from trl import GRPOConfig, GRPOTrainer training_args = GRPOConfig( vllm_sampling_params = vllm_sampling_params, temperature = 1.0, learning_rate = 5e-6, weight_decay = 0.01, warmup_ratio = 0.1, lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, per_device_train_batch_size = 1, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, max_completion_length = max_completion_length, # num_train_epochs = 1, # Set to 1 for a full training run max_steps = 100, save_steps = 100, report_to = "none", # Can use Weights & Biases output_dir = "outputs",

    # For optional training + evaluation
    # fp16_full_eval = True,
    # per_device_eval_batch_size = 4,
    # eval_accumulation_steps = 1,
    # eval_strategy = "steps",
    # eval_steps = 1,
    

    )

    trainer = GRPOTrainer( model = model, processing_class = tokenizer, reward_funcs = [ match_format_exactly, match_format_approximately, check_answer, check_numbers, format_and_language_reward_func, ], args = training_args, train_dataset = dataset,

    # For optional training + evaluation
    # train_dataset = new_dataset["train"],
    # eval_dataset = new_dataset["test"],
    

    ) trainer.train()

🎯 Magistral Conversational Reasoning - Fine-tune Magistral-24B for advanced conversational reasoning - Magistral notebook: https://github.com/unslothai/notebooks/blob/main/nb/Magistral_(24B)-Reasoning-Conversational.ipynb

👁️ Gemma3 Vision Support - Fine-tune Gemma3 vision models for multimodal tasks - Gemma3 Vision notebook: https://github.com/unslothai/notebooks/blob/main/nb/Gemma3_(4B)-Vision.ipynb

Documentation & Guides

What's Changed

New Contributors

Full Changelog: https://github.com/unslothai/unsloth/compare/May-2025...June-2025

Source: README.md, updated 2025-06-26