csm-1b

Running

App Files Files Community

drewThomasson commited on Mar 17

Commit

7dcf55b

verified ·

1 Parent(s): 5a597ea

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -30

app.py CHANGED Viewed

@@ -8,14 +8,11 @@ import spaces
 import torch
 import torchaudio
 from generator import Segment, load_csm_1b
-from huggingface_hub import hf_hub_download, login
 from watermarking import watermark
-api_key = os.getenv("HF_TOKEN")
 gpu_timeout = int(os.getenv("GPU_TIMEOUT", 60))
-CSM_1B_HF_WATERMARK = list(map(int, os.getenv("WATERMARK_KEY").split(" ")))
-login(token=api_key)
 SPACE_INTRO_TEXT = """\
 # Sesame CSM 1B
@@ -24,12 +21,6 @@ Generate from CSM 1B (Conversational Speech Model).
 Code is available on GitHub: [SesameAILabs/csm](https://github.com/SesameAILabs/csm).
 Checkpoint is [hosted on HuggingFace](https://huggingface.co/sesame/csm-1b).
-Try out our interactive demo [sesame.com/voicedemo](https://www.sesame.com/voicedemo),
-this uses a fine-tuned variant of CSM.
-The model has some capacity for non-English languages due to data contamination in the training
-data, but it is likely not to perform well.
 ---
 """
@@ -87,20 +78,6 @@ SPEAKER_PROMPTS = {
         ),
         "audio": "prompts/read_speech_b.wav",
     },
-    "read_speech_c": {
-        "text": (
-            "All passed so quickly, there was so much going on around him, the Tree quite forgot "
-            "to look to himself."
-        ),
-        "audio": "prompts/read_speech_c.wav",
-    },
-    "read_speech_d": {
-        "text": (
-            "Suddenly I was back in the old days Before you felt we ought to drift apart. It was "
-            "some trick-the way your eyebrows raise."
-        ),
-        "audio": "prompts/read_speech_d.wav",
-    },
 }
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -156,7 +133,7 @@ def infer(
     audio_prompt_speaker_b,
     gen_conversation_input,
 ) -> tuple[np.ndarray, int]:
-    # Estimate token limit, otherwise failure might happen after many utterances have been generated.
     if len(gen_conversation_input.strip() + text_prompt_speaker_a.strip() + text_prompt_speaker_b.strip()) >= 2000:
         raise gr.Error("Prompts and conversation too long.", duration=30)
@@ -202,10 +179,7 @@ def _infer(
     audio_tensors = [segment.audio for segment in generated_segments]
     audio_tensor = torch.cat(audio_tensors, dim=0)
-    # This applies an imperceptible watermark to identify audio as AI-generated.
-    # Watermarking ensures transparency, dissuades misuse, and enables traceability.
-    # Please be a responsible AI citizen and keep the watermarking in place.
-    # If using CSM 1B in another application, use your own private key and keep it secret.
     audio_tensor, wm_sample_rate = watermark(
         generator._watermarker, audio_tensor, generator.sample_rate, CSM_1B_HF_WATERMARK
     )

 import torch
 import torchaudio
 from generator import Segment, load_csm_1b
 from watermarking import watermark
+# Simplified environment variables handling
 gpu_timeout = int(os.getenv("GPU_TIMEOUT", 60))
+CSM_1B_HF_WATERMARK = list(map(int, os.getenv("WATERMARK_KEY", "0 0 0").split(" ")))
 SPACE_INTRO_TEXT = """\
 # Sesame CSM 1B
 Code is available on GitHub: [SesameAILabs/csm](https://github.com/SesameAILabs/csm).
 Checkpoint is [hosted on HuggingFace](https://huggingface.co/sesame/csm-1b).
 ---
 """
         ),
         "audio": "prompts/read_speech_b.wav",
     },
 }
 device = "cuda" if torch.cuda.is_available() else "cpu"
     audio_prompt_speaker_b,
     gen_conversation_input,
 ) -> tuple[np.ndarray, int]:
+    # Estimate token limit
     if len(gen_conversation_input.strip() + text_prompt_speaker_a.strip() + text_prompt_speaker_b.strip()) >= 2000:
         raise gr.Error("Prompts and conversation too long.", duration=30)
     audio_tensors = [segment.audio for segment in generated_segments]
     audio_tensor = torch.cat(audio_tensors, dim=0)
+    # Watermarking
     audio_tensor, wm_sample_rate = watermark(
         generator._watermarker, audio_tensor, generator.sample_rate, CSM_1B_HF_WATERMARK
     )