tahirsher commited on
Commit
6027573
·
verified ·
1 Parent(s): 15b7647

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -16
app.py CHANGED
@@ -1,9 +1,10 @@
1
  import os
2
  import torch
3
  import torchaudio
 
4
  import streamlit as st
5
  from huggingface_hub import login
6
- from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
7
 
8
  # ================================
9
  # 1️⃣ Authenticate with Hugging Face Hub (Securely)
@@ -18,9 +19,9 @@ login(token=HF_TOKEN)
18
  # ================================
19
  # 2️⃣ Load Conformer Model & Processor
20
  # ================================
21
- MODEL_NAME = "facebook/wav2vec2-conformer-rel-pos-large"
22
  processor = AutoProcessor.from_pretrained(MODEL_NAME)
23
- model = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_NAME)
24
 
25
  device = "cuda" if torch.cuda.is_available() else "cpu"
26
  model.to(device)
@@ -47,25 +48,23 @@ if audio_file:
47
  with open(audio_path, "wb") as f:
48
  f.write(audio_file.read())
49
 
50
- waveform, sample_rate = torchaudio.load(audio_path)
51
- waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
52
- waveform = waveform.to(dtype=torch.float32)
53
 
54
  # Simulate an adversarial attack by injecting random noise
55
- adversarial_waveform = waveform + (attack_strength * torch.randn_like(waveform))
56
- adversarial_waveform = torch.clamp(adversarial_waveform, -1.0, 1.0)
57
 
58
- inputs = processor(adversarial_waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt")
59
- input_features = inputs.input_values.to(device)
60
- attention_mask = inputs.attention_mask.to(device) if "attention_mask" in inputs else None
61
 
62
- with torch.inference_mode():
63
- generated_ids = model.generate(input_features, max_length=200, num_beams=2, do_sample=False, use_cache=True,
64
- attention_mask=attention_mask)
65
- transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
66
 
67
  if attack_strength > 0.1:
68
  st.warning("⚠️ Adversarial attack detected! Transcription may be affected.")
69
 
70
  st.success("📄 Secure Transcription:")
71
- st.write(transcription)
 
1
  import os
2
  import torch
3
  import torchaudio
4
+ import librosa
5
  import streamlit as st
6
  from huggingface_hub import login
7
+ from transformers import AutoProcessor, AutoModelForCTC
8
 
9
  # ================================
10
  # 1️⃣ Authenticate with Hugging Face Hub (Securely)
 
19
  # ================================
20
  # 2️⃣ Load Conformer Model & Processor
21
  # ================================
22
+ MODEL_NAME = "deepl-project/conformer-finetunning"
23
  processor = AutoProcessor.from_pretrained(MODEL_NAME)
24
+ model = AutoModelForCTC.from_pretrained(MODEL_NAME)
25
 
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
  model.to(device)
 
48
  with open(audio_path, "wb") as f:
49
  f.write(audio_file.read())
50
 
51
+ speech, sr = librosa.load(audio_path, sr=16000)
 
 
52
 
53
  # Simulate an adversarial attack by injecting random noise
54
+ adversarial_speech = speech + (attack_strength * np.random.randn(*speech.shape))
55
+ adversarial_speech = np.clip(adversarial_speech, -1.0, 1.0)
56
 
57
+ inputs = processor(adversarial_speech, sampling_rate=sr, return_tensors="pt", padding=True)
58
+ input_values = inputs.input_values.to(device)
 
59
 
60
+ with torch.no_grad():
61
+ logits = model(input_values).logits
62
+
63
+ predicted_ids = torch.argmax(logits, dim=-1)
64
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
65
 
66
  if attack_strength > 0.1:
67
  st.warning("⚠️ Adversarial attack detected! Transcription may be affected.")
68
 
69
  st.success("📄 Secure Transcription:")
70
+ st.write(transcription[0])