Browse Source

add_model_to_local

finalv3
ydw 2 months ago
parent
commit
2bb917884a
20 changed files with 960 additions and 5 deletions
  1. +2
    -2
      package-lock.json
  2. +1
    -1
      package.json
  3. BIN
      src-py/__pycache__/VoiceController.cpython-38.pyc
  4. BIN
      src-py/router/__pycache__/__init__.cpython-38.pyc
  5. BIN
      src-py/router/__pycache__/ws.cpython-38.pyc
  6. +243
    -2
      src-py/router/ws.py
  7. +6
    -0
      src-py/vosk-model-small-cn-0.22/README
  8. BIN
      src-py/vosk-model-small-cn-0.22/am/final.mdl
  9. +8
    -0
      src-py/vosk-model-small-cn-0.22/conf/mfcc.conf
  10. +10
    -0
      src-py/vosk-model-small-cn-0.22/conf/model.conf
  11. BIN
      src-py/vosk-model-small-cn-0.22/graph/Gr.fst
  12. BIN
      src-py/vosk-model-small-cn-0.22/graph/HCLr.fst
  13. +39
    -0
      src-py/vosk-model-small-cn-0.22/graph/disambig_tid.int
  14. +646
    -0
      src-py/vosk-model-small-cn-0.22/graph/phones/word_boundary.int
  15. BIN
      src-py/vosk-model-small-cn-0.22/ivector/final.dubm
  16. BIN
      src-py/vosk-model-small-cn-0.22/ivector/final.ie
  17. BIN
      src-py/vosk-model-small-cn-0.22/ivector/final.mat
  18. +3
    -0
      src-py/vosk-model-small-cn-0.22/ivector/global_cmvn.stats
  19. +0
    -0
      src-py/vosk-model-small-cn-0.22/ivector/online_cmvn.conf
  20. +2
    -0
      src-py/vosk-model-small-cn-0.22/ivector/splice.conf

+ 2
- 2
package-lock.json View File

@ -33,7 +33,7 @@
"naive-ui": "^2.41.0",
"sass-embedded": "^1.85.1",
"typescript": "~5.6.2",
"vite": "^6.0.3",
"vite": "^6.3.5",
"vitest": "^3.2.4",
"vue-tsc": "^2.1.10"
}
@ -4301,7 +4301,7 @@
},
"node_modules/vite": {
"version": "6.3.5",
"resolved": "https://registry.npmmirror.com/vite/-/vite-6.3.5.tgz",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.3.5.tgz",
"integrity": "sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==",
"dev": true,
"license": "MIT",

+ 1
- 1
package.json View File

@ -43,7 +43,7 @@
"naive-ui": "^2.41.0",
"sass-embedded": "^1.85.1",
"typescript": "~5.6.2",
"vite": "^6.0.3",
"vite": "^6.3.5",
"vitest": "^3.2.4",
"vue-tsc": "^2.1.10"
}

BIN
src-py/__pycache__/VoiceController.cpython-38.pyc View File


BIN
src-py/router/__pycache__/__init__.cpython-38.pyc View File


BIN
src-py/router/__pycache__/ws.cpython-38.pyc View File


+ 243
- 2
src-py/router/ws.py View File

@ -1,4 +1,16 @@
import json
import tkinter as tk
import threading
import queue
import time
import json
import sounddevice as sd
import numpy as np
from vosk import Model, KaldiRecognizer
import os
import platform
from dataclasses import dataclass
from typing import Dict, List, Optional, Union
@ -168,8 +180,210 @@ class GestureHandler:
self.keyboard.release(key)
except Exception as e:
print(f"执行按键失败: {e}")
# 字幕控制
class VoiceSubtitleApp:
def __init__(self, root):
self.root = root
self.root.title("实时语音字幕")
# 设置窗口属性:始终置顶,无边框,完全透明
if platform.system() == 'Darwin': # macOS
self.root.attributes('-topmost', 1)
self.root.attributes('-alpha', 1.0)
self.root.attributes('-transparent', True)
self.root.configure(bg='black') # 使用黑色背景
self.root.wm_attributes('-transparent', True)
self.root.update_idletasks()
self.root.lift()
else: # Windows 和其他系统
self.root.attributes('-topmost', True)
self.root.attributes('-alpha', 1.0)
self.root.configure(bg='black')
self.root.overrideredirect(True) # 无边框模式
# 设置窗口大小和位置
self.window_width = 800
self.window_height = 100
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
x = (screen_width - self.window_width) // 2
y = screen_height - self.window_height - 100
self.root.geometry(f"{self.window_width}x{self.window_height}+{x}+{y}")
# 创建文字标签
self.text_label = tk.Label(
root,
text="", # 初始时不显示文字
font=("Arial", 24, "bold"),
fg="white",
bg='black', # 使用黑色背景
wraplength=780,
highlightthickness=0,
borderwidth=0
)
self.text_label.pack(expand=True, fill='both', padx=10)
# 添加拖动功能
self.text_label.bind('<Button-1>', self.start_move)
self.text_label.bind('<B1-Motion>', self.on_move)
# 添加右键点击退出功能
self.text_label.bind('<Button-3>', lambda e: self.on_closing())
self.is_running = True
self.audio_queue = queue.Queue()
self.partial_result = ""
self.last_voice_time = time.time()
try:
# 初始化Vosk模型
print("正在加载语音识别模型...")
model_path = "../vosk-model-cn-0.22"
if not os.path.exists(model_path):
model_path = "../vosk-model-small-cn-0.22"
print("未找到中型模型,使用小型模型")
if not os.path.exists(model_path):
print(f"错误:找不到模型文件夹 {model_path}")
self.update_subtitle(f"错误:找不到模型文件夹 {model_path}")
return
print(f"使用模型: {model_path}")
self.model = Model(model_path)
self.recognizer = KaldiRecognizer(self.model, 16000)
self.recognizer.SetMaxAlternatives(0)
self.recognizer.SetWords(True)
print("模型加载完成")
# 获取可用的音频设备
devices = sd.query_devices()
print("可用的音频设备:")
for i, device in enumerate(devices):
print(f"{i}: {device['name']}")
# 使用默认输入设备
default_input = sd.query_devices(kind='input')
print(f"使用默认输入设备: {default_input['name']}")
# 启动音频处理线程
self.audio_thread = threading.Thread(target=self.process_audio)
self.audio_thread.daemon = True
self.audio_thread.start()
# 启动识别线程
self.recognition_thread = threading.Thread(target=self.recognize_speech)
self.recognition_thread.daemon = True
self.recognition_thread.start()
except Exception as e:
print(f"初始化错误: {str(e)}")
self.update_subtitle(f"初始化失败: {str(e)}")
return
def start_move(self, event):
"""开始拖动窗口"""
self.x = event.x
self.y = event.y
def on_move(self, event):
"""处理窗口拖动"""
deltax = event.x - self.x
deltay = event.y - self.y
x = self.root.winfo_x() + deltax
y = self.root.winfo_y() + deltay
self.root.geometry(f"+{x}+{y}")
def audio_callback(self, indata, frames, time, status):
"""音频回调函数"""
if status:
print(status)
self.audio_queue.put(bytes(indata))
def process_audio(self):
"""处理音频输入"""
try:
with sd.RawInputStream(samplerate=16000, channels=1, dtype='int16',
blocksize=4000,
device=None,
callback=self.audio_callback):
print("开始录音...")
while self.is_running:
time.sleep(0.05)
self.root.after(0, self.fade_out_text)
except Exception as e:
print(f"音频处理错误: {str(e)}")
self.root.after(0, self.update_subtitle, f"音频处理错误: {str(e)}")
def recognize_speech(self):
"""语音识别处理"""
print("开始识别...")
while self.is_running:
try:
audio_data = self.audio_queue.get(timeout=0.5)
if len(audio_data) == 0:
continue
if self.recognizer.AcceptWaveform(audio_data):
result = json.loads(self.recognizer.Result())
text = result.get("text", "").strip()
if text:
print(f"最终结果: {text}")
self.last_voice_time = time.time()
self.root.after(0, self.update_subtitle, text)
else:
partial = json.loads(self.recognizer.PartialResult())
partial_text = partial.get("partial", "").strip()
if partial_text and partial_text != self.partial_result:
self.partial_result = partial_text
print(f"部分结果: {partial_text}")
self.last_voice_time = time.time()
self.root.after(0, self.update_subtitle, partial_text)
except queue.Empty:
continue
except Exception as e:
print(f"识别错误: {str(e)}")
time.sleep(0.1)
def fade_out_text(self):
"""文字淡出效果"""
try:
if time.time() - self.last_voice_time > 3: # 3秒无输入后开始淡化
current_color = self.text_label.cget('fg')
if current_color == 'white': # 如果是完全不透明
self.text_label.configure(fg='#FFFFFF') # 设置初始颜色
else:
# 提取当前颜色值
color = current_color.lstrip('#')
if len(color) == 6: # 确保是有效的颜色值
# 降低不透明度
new_alpha = max(0, int(color[0:2], 16) - 15)
if new_alpha > 0: # 如果还没有完全透明
new_color = f'#{new_alpha:02x}{new_alpha:02x}{new_alpha:02x}'
self.text_label.configure(fg=new_color)
self.root.after(50, self.fade_out_text) # 继续淡化
else:
# 完全透明时清空文字
self.text_label.config(text="")
self.text_label.update()
except Exception as e:
print(f"淡化效果错误: {str(e)}")
def update_subtitle(self, text):
"""更新字幕文本"""
if not text:
return
self.text_label.config(text=text)
self.text_label.configure(fg='white')
self.text_label.update()
self.last_voice_time = time.time()
def on_closing(self):
self.is_running = False
self.root.destroy()
class VoiceHandler:
"""语音处理控制器"""
@ -177,6 +391,10 @@ class VoiceHandler:
from VoiceController import VoiceController
self.controller: Optional[VoiceController] = None
self.root: Optional[tk.Tk] = None # 保存Tkinter主窗口
self.app: Optional[VoiceSubtitleApp] = None # 保存字幕应用实例
self.tk_thread: Optional[threading.Thread] = None # 保存Tkinter线程
try:
self.controller = VoiceController()
except Exception as e:
@ -184,21 +402,44 @@ class VoiceHandler:
async def start_recording(self, websocket: WebSocket) -> None:
"""开始录音"""
if self.app:
self.root.after(0, self.app.on_closing) # 线程安全地关闭
self.app = None
self.root = None
if self.controller and not self.controller.is_recording:
self.controller.start_record_thread()
def run_tk_app():
self.root = tk.Tk()
self.app = VoiceSubtitleApp(self.root)
self.root.mainloop() # 事件循环在子线程中运行
self.tk_thread = threading.Thread(target=run_tk_app, daemon=True)
self.tk_thread.start()
async def stop_recording(
self, websocket: WebSocket, gesture_handler: GestureHandler
) -> None:
"""停止录音并处理结果"""
if self.controller and self.controller.is_recording:
self.controller.stop_record()
# 获取识别结果并输
# 获取识别结果并输
text = self.controller.transcribe_audio()
if text:
gesture_handler.keyboard.type(text)
# gesture_handler.keyboard.tap(Key.enter)
if self.app and self.root:
# 线程安全地调用关闭方法(Tkinter操作需在创建它的线程中执行)
self.root.after(0, self.app.on_closing)
# 等待窗口关闭(可选,确保资源释放)
self.app = None
self.root = None
if self.tk_thread and self.tk_thread.is_alive():
self.tk_thread.join(timeout=1.0) # 最多等待1秒
@router.websocket("/ws_wavecontrol")

+ 6
- 0
src-py/vosk-model-small-cn-0.22/README View File

@ -0,0 +1,6 @@
Chinese Vosk model for mobile
CER results
23.54% speechio_02
38.29% speechio_06

BIN
src-py/vosk-model-small-cn-0.22/am/final.mdl View File


+ 8
- 0
src-py/vosk-model-small-cn-0.22/conf/mfcc.conf View File

@ -0,0 +1,8 @@
--use-energy=false
--sample-frequency=16000
--num-mel-bins=40
--num-ceps=40
--low-freq=40
--high-freq=-200
--allow-upsample=true
--allow-downsample=true

+ 10
- 0
src-py/vosk-model-small-cn-0.22/conf/model.conf View File

@ -0,0 +1,10 @@
--min-active=200
--max-active=5000
--beam=12.0
--lattice-beam=4.0
--acoustic-scale=1.0
--frame-subsampling-factor=3
--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
--endpoint.rule2.min-trailing-silence=0.5
--endpoint.rule3.min-trailing-silence=1.0
--endpoint.rule4.min-trailing-silence=2.0

BIN
src-py/vosk-model-small-cn-0.22/graph/Gr.fst View File


BIN
src-py/vosk-model-small-cn-0.22/graph/HCLr.fst View File


+ 39
- 0
src-py/vosk-model-small-cn-0.22/graph/disambig_tid.int View File

@ -0,0 +1,39 @@
11845
11846
11847
11848
11849
11850
11851
11852
11853
11854
11855
11856
11857
11858
11859
11860
11861
11862
11863
11864
11865
11866
11867
11868
11869
11870
11871
11872
11873
11874
11875
11876
11877
11878
11879
11880
11881
11882
11883

+ 646
- 0
src-py/vosk-model-small-cn-0.22/graph/phones/word_boundary.int View File

@ -0,0 +1,646 @@
1 nonword
2 begin
3 end
4 internal
5 singleton
6 nonword
7 begin
8 end
9 internal
10 singleton
11 begin
12 end
13 internal
14 singleton
15 begin
16 end
17 internal
18 singleton
19 begin
20 end
21 internal
22 singleton
23 begin
24 end
25 internal
26 singleton
27 begin
28 end
29 internal
30 singleton
31 begin
32 end
33 internal
34 singleton
35 begin
36 end
37 internal
38 singleton
39 begin
40 end
41 internal
42 singleton
43 begin
44 end
45 internal
46 singleton
47 begin
48 end
49 internal
50 singleton
51 begin
52 end
53 internal
54 singleton
55 begin
56 end
57 internal
58 singleton
59 begin
60 end
61 internal
62 singleton
63 begin
64 end
65 internal
66 singleton
67 begin
68 end
69 internal
70 singleton
71 begin
72 end
73 internal
74 singleton
75 begin
76 end
77 internal
78 singleton
79 begin
80 end
81 internal
82 singleton
83 begin
84 end
85 internal
86 singleton
87 begin
88 end
89 internal
90 singleton
91 begin
92 end
93 internal
94 singleton
95 begin
96 end
97 internal
98 singleton
99 begin
100 end
101 internal
102 singleton
103 begin
104 end
105 internal
106 singleton
107 begin
108 end
109 internal
110 singleton
111 begin
112 end
113 internal
114 singleton
115 begin
116 end
117 internal
118 singleton
119 begin
120 end
121 internal
122 singleton
123 begin
124 end
125 internal
126 singleton
127 begin
128 end
129 internal
130 singleton
131 begin
132 end
133 internal
134 singleton
135 begin
136 end
137 internal
138 singleton
139 begin
140 end
141 internal
142 singleton
143 begin
144 end
145 internal
146 singleton
147 begin
148 end
149 internal
150 singleton
151 begin
152 end
153 internal
154 singleton
155 begin
156 end
157 internal
158 singleton
159 begin
160 end
161 internal
162 singleton
163 begin
164 end
165 internal
166 singleton
167 begin
168 end
169 internal
170 singleton
171 begin
172 end
173 internal
174 singleton
175 begin
176 end
177 internal
178 singleton
179 begin
180 end
181 internal
182 singleton
183 begin
184 end
185 internal
186 singleton
187 begin
188 end
189 internal
190 singleton
191 begin
192 end
193 internal
194 singleton
195 begin
196 end
197 internal
198 singleton
199 begin
200 end
201 internal
202 singleton
203 begin
204 end
205 internal
206 singleton
207 begin
208 end
209 internal
210 singleton
211 begin
212 end
213 internal
214 singleton
215 begin
216 end
217 internal
218 singleton
219 begin
220 end
221 internal
222 singleton
223 begin
224 end
225 internal
226 singleton
227 begin
228 end
229 internal
230 singleton
231 begin
232 end
233 internal
234 singleton
235 begin
236 end
237 internal
238 singleton
239 begin
240 end
241 internal
242 singleton
243 begin
244 end
245 internal
246 singleton
247 begin
248 end
249 internal
250 singleton
251 begin
252 end
253 internal
254 singleton
255 begin
256 end
257 internal
258 singleton
259 begin
260 end
261 internal
262 singleton
263 begin
264 end
265 internal
266 singleton
267 begin
268 end
269 internal
270 singleton
271 begin
272 end
273 internal
274 singleton
275 begin
276 end
277 internal
278 singleton
279 begin
280 end
281 internal
282 singleton
283 begin
284 end
285 internal
286 singleton
287 begin
288 end
289 internal
290 singleton
291 begin
292 end
293 internal
294 singleton
295 begin
296 end
297 internal
298 singleton
299 begin
300 end
301 internal
302 singleton
303 begin
304 end
305 internal
306 singleton
307 begin
308 end
309 internal
310 singleton
311 begin
312 end
313 internal
314 singleton
315 begin
316 end
317 internal
318 singleton
319 begin
320 end
321 internal
322 singleton
323 begin
324 end
325 internal
326 singleton
327 begin
328 end
329 internal
330 singleton
331 begin
332 end
333 internal
334 singleton
335 begin
336 end
337 internal
338 singleton
339 begin
340 end
341 internal
342 singleton
343 begin
344 end
345 internal
346 singleton
347 begin
348 end
349 internal
350 singleton
351 begin
352 end
353 internal
354 singleton
355 begin
356 end
357 internal
358 singleton
359 begin
360 end
361 internal
362 singleton
363 begin
364 end
365 internal
366 singleton
367 begin
368 end
369 internal
370 singleton
371 begin
372 end
373 internal
374 singleton
375 begin
376 end
377 internal
378 singleton
379 begin
380 end
381 internal
382 singleton
383 begin
384 end
385 internal
386 singleton
387 begin
388 end
389 internal
390 singleton
391 begin
392 end
393 internal
394 singleton
395 begin
396 end
397 internal
398 singleton
399 begin
400 end
401 internal
402 singleton
403 begin
404 end
405 internal
406 singleton
407 begin
408 end
409 internal
410 singleton
411 begin
412 end
413 internal
414 singleton
415 begin
416 end
417 internal
418 singleton
419 begin
420 end
421 internal
422 singleton
423 begin
424 end
425 internal
426 singleton
427 begin
428 end
429 internal
430 singleton
431 begin
432 end
433 internal
434 singleton
435 begin
436 end
437 internal
438 singleton
439 begin
440 end
441 internal
442 singleton
443 begin
444 end
445 internal
446 singleton
447 begin
448 end
449 internal
450 singleton
451 begin
452 end
453 internal
454 singleton
455 begin
456 end
457 internal
458 singleton
459 begin
460 end
461 internal
462 singleton
463 begin
464 end
465 internal
466 singleton
467 begin
468 end
469 internal
470 singleton
471 begin
472 end
473 internal
474 singleton
475 begin
476 end
477 internal
478 singleton
479 begin
480 end
481 internal
482 singleton
483 begin
484 end
485 internal
486 singleton
487 begin
488 end
489 internal
490 singleton
491 begin
492 end
493 internal
494 singleton
495 begin
496 end
497 internal
498 singleton
499 begin
500 end
501 internal
502 singleton
503 begin
504 end
505 internal
506 singleton
507 begin
508 end
509 internal
510 singleton
511 begin
512 end
513 internal
514 singleton
515 begin
516 end
517 internal
518 singleton
519 begin
520 end
521 internal
522 singleton
523 begin
524 end
525 internal
526 singleton
527 begin
528 end
529 internal
530 singleton
531 begin
532 end
533 internal
534 singleton
535 begin
536 end
537 internal
538 singleton
539 begin
540 end
541 internal
542 singleton
543 begin
544 end
545 internal
546 singleton
547 begin
548 end
549 internal
550 singleton
551 begin
552 end
553 internal
554 singleton
555 begin
556 end
557 internal
558 singleton
559 begin
560 end
561 internal
562 singleton
563 begin
564 end
565 internal
566 singleton
567 begin
568 end
569 internal
570 singleton
571 begin
572 end
573 internal
574 singleton
575 begin
576 end
577 internal
578 singleton
579 begin
580 end
581 internal
582 singleton
583 begin
584 end
585 internal
586 singleton
587 begin
588 end
589 internal
590 singleton
591 begin
592 end
593 internal
594 singleton
595 begin
596 end
597 internal
598 singleton
599 begin
600 end
601 internal
602 singleton
603 begin
604 end
605 internal
606 singleton
607 begin
608 end
609 internal
610 singleton
611 begin
612 end
613 internal
614 singleton
615 begin
616 end
617 internal
618 singleton
619 begin
620 end
621 internal
622 singleton
623 begin
624 end
625 internal
626 singleton
627 begin
628 end
629 internal
630 singleton
631 begin
632 end
633 internal
634 singleton
635 begin
636 end
637 internal
638 singleton
639 begin
640 end
641 internal
642 singleton
643 begin
644 end
645 internal
646 singleton

BIN
src-py/vosk-model-small-cn-0.22/ivector/final.dubm View File


BIN
src-py/vosk-model-small-cn-0.22/ivector/final.ie View File


BIN
src-py/vosk-model-small-cn-0.22/ivector/final.mat View File


+ 3
- 0
src-py/vosk-model-small-cn-0.22/ivector/global_cmvn.stats View File

@ -0,0 +1,3 @@
[
1.117107e+11 -7.827721e+08 -1.101398e+10 -2.193934e+09 -1.347332e+10 -1.613916e+10 -1.199561e+10 -1.255081e+10 -1.638895e+10 -3.821099e+09 -1.372833e+10 -5.244242e+09 -1.098187e+10 -3.655235e+09 -9.364579e+09 -4.285302e+09 -6.296873e+09 -1.552953e+09 -3.176746e+09 -1.202976e+08 -9.857023e+08 2.316555e+08 -1.61059e+08 -5.891868e+07 3.465849e+08 -1.842054e+08 3.248211e+08 -1.483965e+08 3.739239e+08 -6.672061e+08 4.442288e+08 -9.274889e+08 5.142684e+08 4.292036e+07 2.206386e+08 -4.532715e+08 -2.092499e+08 -3.70488e+08 -8.079404e+07 -8.425977e+07 1.344125e+09
9.982632e+12 1.02635e+12 8.634624e+11 9.06451e+11 9.652096e+11 1.12772e+12 9.468372e+11 9.141218e+11 9.670484e+11 6.936961e+11 8.141006e+11 6.256321e+11 6.087707e+11 4.616898e+11 4.212042e+11 2.862872e+11 2.498089e+11 1.470856e+11 1.099197e+11 5.780894e+10 3.118114e+10 1.060667e+10 1.466199e+09 4.173056e+08 5.257362e+09 1.277714e+10 2.114478e+10 2.974502e+10 3.587691e+10 4.078971e+10 4.247745e+10 4.382608e+10 4.62521e+10 4.575282e+10 3.546206e+10 3.041531e+10 2.838562e+10 2.258604e+10 1.715295e+10 1.303227e+10 0 ]

+ 0
- 0
src-py/vosk-model-small-cn-0.22/ivector/online_cmvn.conf View File


+ 2
- 0
src-py/vosk-model-small-cn-0.22/ivector/splice.conf View File

@ -0,0 +1,2 @@
--left-context=3
--right-context=3

Loading…
Cancel
Save