huggingface · gaurav-init · Jun 21, 2026
diff --git a/docs/source/en/api/pipelines/animatediff.md b/docs/source/en/api/pipelines/animatediff.md
@@ -172,7 +172,7 @@ Here are some sample outputs:
         <td align="center">
           raccoon playing a guitar
           <br />
-          <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-vid2vid-input-1.gif" alt="racoon playing a guitar" />
+          <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-vid2vid-input-1.gif" alt="raccoon playing a guitar" />
         </td>
         <td align="center">
           a panda, playing a guitar, sitting in a pink boat, in the ocean, mountains in background, realistic, high quality
@@ -491,7 +491,7 @@ Here are some sample outputs:
           raccoon playing a guitar
           <br />
           <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-vid2vid-input-1.gif"
-              alt="racoon playing a guitar"
+              alt="raccoon playing a guitar"
               style="width: 300px;" />
         </td>
         <td align=center>

diff --git a/docs/source/en/api/pipelines/bria_fibo.md b/docs/source/en/api/pipelines/bria_fibo.md
@@ -16,7 +16,7 @@ Text-to-image models have mastered imagination - but not control. FIBO changes t
 
 FIBO is trained on structured JSON captions up to 1,000+ words and designed to understand and control different visual parameters such as lighting, composition, color, and camera settings, enabling precise and reproducible outputs.
 
-With only 8 billion parameters, FIBO provides a new level of image quality, prompt adherence and proffesional control.
+With only 8 billion parameters, FIBO provides a new level of image quality, prompt adherence and professional control.
 
 FIBO is trained exclusively on a structured prompt and will not work with freeform text prompts.
 you can use the [FIBO-VLM-prompt-to-JSON](https://huggingface.co/briaai/FIBO-VLM-prompt-to-JSON) model or the [FIBO-gemini-prompt-to-JSON](https://huggingface.co/briaai/FIBO-gemini-prompt-to-JSON)  to convert your freeform text prompt to a structured JSON prompt.

diff --git a/docs/source/en/api/pipelines/kandinsky5_video.md b/docs/source/en/api/pipelines/kandinsky5_video.md
@@ -54,7 +54,7 @@ Kandinsky 5.0 T2V Lite:
 ### Basic Text-to-Video Generation
 
 #### Pro
-**⚠️ Warning!** all Pro models should be infered with pipeline.enable_model_cpu_offload()  
+**⚠️ Warning!** all Pro models should be inferred with pipeline.enable_model_cpu_offload()  
 ```python
 import torch
 from diffusers import Kandinsky5T2VPipeline
@@ -65,7 +65,7 @@ model_id = "kandinskylab/Kandinsky-5.0-T2V-Pro-sft-5s-Diffusers"
 pipe = Kandinsky5T2VPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
 
 pipe = pipe.to("cuda")
-pipeline.transformer.set_attention_backend("flex")                            # <--- Set attention bakend to Flex
+pipeline.transformer.set_attention_backend("flex")                            # <--- Set attention backend to Flex
 pipeline.enable_model_cpu_offload()                                           # <--- Enable cpu offloading for single GPU inference
 pipeline.transformer.compile(mode="max-autotune-no-cudagraphs", dynamic=True) # <--- Compile with max-autotune-no-cudagraphs
 
@@ -126,7 +126,7 @@ pipe = pipe.to("cuda")
 
 pipe.transformer.set_attention_backend(
     "flex"
-)                                       # <--- Set attention bakend to Flex
+)                                       # <--- Set attention backend to Flex
 pipe.transformer.compile(
     mode="max-autotune-no-cudagraphs", 
     dynamic=True
@@ -149,7 +149,7 @@ export_to_video(output, "output.mp4", fps=24, quality=9)
 ```
 
 ### Diffusion Distilled model
-**⚠️ Warning!** all nocfg and diffusion distilled models should be infered wothout CFG (```guidance_scale=1.0```):
+**⚠️ Warning!** all nocfg and diffusion distilled models should be inferred without CFG (```guidance_scale=1.0```):
 
 ```python
 model_id = "kandinskylab/Kandinsky-5.0-T2V-Lite-distilled16steps-5s-Diffusers"
@@ -167,7 +167,7 @@ export_to_video(output, "output.mp4", fps=24, quality=9)
 
 
 ### Basic Image-to-Video Generation
-**⚠️ Warning!** all Pro models should be infered with pipeline.enable_model_cpu_offload()  
+**⚠️ Warning!** all Pro models should be inferred with pipeline.enable_model_cpu_offload()  
 ```python
 import torch
 from diffusers import Kandinsky5T2VPipeline
@@ -178,7 +178,7 @@ model_id = "kandinskylab/Kandinsky-5.0-I2V-Pro-sft-5s-Diffusers"
 pipe = Kandinsky5T2VPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
 
 pipe = pipe.to("cuda")
-pipeline.transformer.set_attention_backend("flex")                            # <--- Set attention bakend to Flex
+pipeline.transformer.set_attention_backend("flex")                            # <--- Set attention backend to Flex
 pipeline.enable_model_cpu_offload()                                           # <--- Enable cpu offloading for single GPU inference
 pipeline.transformer.compile(mode="max-autotune-no-cudagraphs", dynamic=True) # <--- Compile with max-autotune-no-cudagraphs
 

diff --git a/docs/source/en/api/pipelines/stable_diffusion/sdxl_turbo.md b/docs/source/en/api/pipelines/stable_diffusion/sdxl_turbo.md
@@ -75,14 +75,14 @@ import torch
 pipeline_text2image = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16")
 pipeline_text2image = pipeline_text2image.to("cuda")
 
-prompt = "A cinematic shot of a baby racoon wearing an intricate italian priest robe."
+prompt = "A cinematic shot of a baby raccoon wearing an intricate italian priest robe."
 
 image = pipeline_text2image(prompt=prompt, guidance_scale=0.0, num_inference_steps=1).images[0]
 image
 ```
 
 <div class="flex justify-center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/sdxl-turbo-text2img.png" alt="generated image of a racoon in a robe"/>
+    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/sdxl-turbo-text2img.png" alt="generated image of a raccoon in a robe"/>
 </div>
 
 ## Image-to-image

diff --git a/docs/source/en/api/pipelines/wan.md b/docs/source/en/api/pipelines/wan.md
@@ -531,7 +531,7 @@ export_to_video(output, "animated_advanced.mp4", fps=30)
 
 - Try lower `shift` values (`2.0` to `5.0`) for lower resolution videos and higher `shift` values (`7.0` to `12.0`) for higher resolution images.
 
-- Wan 2.1 and 2.2 support using [LightX2V LoRAs](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Lightx2v) to speed up inference. Using them on Wan 2.2 is slightly more involed. Refer to [this code snippet](https://github.com/huggingface/diffusers/pull/12040#issuecomment-3144185272) to learn more.
+- Wan 2.1 and 2.2 support using [LightX2V LoRAs](https://huggingface.co/Kijai/WanVideo_comfy/tree/main/Lightx2v) to speed up inference. Using them on Wan 2.2 is slightly more involved. Refer to [this code snippet](https://github.com/huggingface/diffusers/pull/12040#issuecomment-3144185272) to learn more.
 
 - Wan 2.2 has two denoisers. By default, LoRAs are only loaded into the first denoiser. One can set `load_into_transformer_2=True` to load LoRAs into the second denoiser. Refer to [this](https://github.com/huggingface/diffusers/pull/12074#issue-3292620048) and [this](https://github.com/huggingface/diffusers/pull/12074#issuecomment-3155896144) examples to learn more.
 

diff --git a/src/diffusers/pipelines/bria/pipeline_bria.py b/src/diffusers/pipelines/bria/pipeline_bria.py
@@ -604,7 +604,7 @@ def __call__(
             prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
 
         # 5. Prepare latent variables
-        num_channels_latents = self.transformer.config.in_channels // 4  # due to patch=2, we devide by 4
+        num_channels_latents = self.transformer.config.in_channels // 4  # due to patch=2, we divide by 4
         latents, latent_image_ids = self.prepare_latents(
             batch_size * num_images_per_prompt,
             num_channels_latents,

diff --git a/...nes/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py b/...nes/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py
@@ -320,10 +320,10 @@ def __call__(
         Args:
             prompt (`str` or `list[str]`):
                 The prompt or prompts to guide the image generation.
-            image (`nd.ndarray` or `PIL.Image.Image`):
+            image (`np.ndarray` or `PIL.Image.Image`):
                 `Image`, or tensor representing an image batch, that will be used as the starting point for the
                 process. This is the image whose masked region will be inpainted.
-            mask_image (`nd.ndarray` or `PIL.Image.Image`):
+            mask_image (`np.ndarray` or `PIL.Image.Image`):
                 `Image`, or tensor representing an image batch, to mask `image`. White pixels in the mask will be
                 replaced by noise and therefore repainted, while black pixels will be preserved. If `mask_image` is a
                 PIL image, it will be converted to a single channel (luminance) before use. If it's a tensor, it should

diff --git a/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py b/src/diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py
@@ -150,7 +150,7 @@ def prepare_mask_and_masked_image(image, mask, height, width):
         ValueError: ``torch.Tensor`` images should be in the ``[-1, 1]`` range. ValueError: ``torch.Tensor`` mask
         should be in the ``[0, 1]`` range. ValueError: ``mask`` and ``image`` should have the same spatial dimensions.
         TypeError: ``mask`` is a ``torch.Tensor`` but ``image`` is not
-            (ot the other way around).
+            (or the other way around).
 
     Returns:
         tuple[torch.Tensor]: The pair (mask, image) as ``torch.Tensor`` with 4

diff --git a/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py b/src/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py
@@ -148,7 +148,7 @@ def prepare_mask_and_masked_image(image, mask, height, width):
         ValueError: ``torch.Tensor`` images should be in the ``[-1, 1]`` range. ValueError: ``torch.Tensor`` mask
         should be in the ``[0, 1]`` range. ValueError: ``mask`` and ``image`` should have the same spatial dimensions.
         TypeError: ``mask`` is a ``torch.Tensor`` but ``image`` is not
-            (ot the other way around).
+            (or the other way around).
 
     Returns:
         tuple[torch.Tensor]: The pair (mask, image) as ``torch.Tensor`` with 4

diff --git a/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py b/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py
@@ -223,7 +223,7 @@ def check_inputs(
                     f"got {type(task_prompt)} and {type(content_prompt)}"
                 )
             if len(content_prompt) != len(task_prompt):
-                raise ValueError("`task_prompt` and `content_prompt` must have the same length whe they are lists.")
+                raise ValueError("`task_prompt` and `content_prompt` must have the same length when they are lists.")
 
             for sample in image:
                 if not isinstance(sample, list) or not isinstance(sample[0], list):

diff --git a/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py b/src/diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py
@@ -443,7 +443,7 @@ def check_inputs(
                     f"got {type(task_prompt)} and {type(content_prompt)}"
                 )
             if len(content_prompt) != len(task_prompt):
-                raise ValueError("`task_prompt` and `content_prompt` must have the same length whe they are lists.")
+                raise ValueError("`task_prompt` and `content_prompt` must have the same length when they are lists.")
 
             for sample in image:
                 if not isinstance(sample, list) or not isinstance(sample[0], list):

diff --git a/src/diffusers/quantizers/gguf/utils.py b/src/diffusers/quantizers/gguf/utils.py
@@ -518,7 +518,7 @@ def dequantize_gguf_tensor(tensor):
 
     block_size, type_size = GGML_QUANT_SIZES[quant_type]
 
-    # Conver to plain tensor to avoid unnecessary __torch_function__ overhead.
+    # Convert to plain tensor to avoid unnecessary __torch_function__ overhead.
     tensor = tensor.as_tensor()
 
     tensor = tensor.view(torch.uint8)