#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
MKV音频提取器
功能：选择MKV文件，列出音视频字幕组件，按字幕时间切割合并音频为output.mp3
"""

import os
import sys
import traceback
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import subprocess
import tempfile
import re
import multiprocessing
from multiprocessing import Manager, Queue
import time
from datetime import datetime

# 将工作函数定义为独立函数，避免引用Tkinter对象
def process_segments_worker(segments, temp_dir, start_idx, progress_dict, file_list_lock, temp_files, mkv_file, selected_audio, error_dict=None):
    """多进程模式下的工作函数，处理分配的字幕片段"""
    try:
        for i, (start, end) in enumerate(segments):
            # 计算全局索引
            global_index = start_idx + i
            
            print(f"[进程 {os.getpid()}] 开始处理片段 {global_index}: {start} 到 {end}")
            
            # 切割音频并保存到临时文件
            temp_file = os.path.join(temp_dir, f"segment_{global_index}.mp3")
            
            # 使用子进程处理音频提取
            cmd = [
                "ffmpeg", 
                "-i", mkv_file, 
                "-map", f"0:{selected_audio}", 
                "-ss", str(start), 
                "-to", str(end), 
                "-vn",  # 不包含视频
                "-acodec", "mp3",  # 转换为MP3
                "-y",  # 覆盖现有文件
                temp_file
            ]
            
            # 设置ffmpeg命令的超时时间，防止单个命令卡住
            try:
                result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=60)  # 60秒超时
            except subprocess.TimeoutExpired:
                print(f"[进程 {os.getpid()}] 处理片段 {global_index} 超时")
                # 创建一个空的临时文件，表示该片段处理失败
                with open(temp_file, 'w') as f:
                    f.write('timeout')
            except Exception as e:
                print(f"[进程 {os.getpid()}] 处理片段 {global_index} 失败: {str(e)}")
                # 创建一个空的临时文件，表示该片段处理失败
                with open(temp_file, 'w') as f:
                    f.write('error')
            
            # 线程安全地添加到临时文件列表
            with file_list_lock:
                temp_files.append(temp_file)
            
            # 更新进度
            progress_dict['completed'] += 1
            print(f"[进程 {os.getpid()}] 完成片段 {global_index}/{len(segments)}，累计完成: {progress_dict['completed']}")
    except Exception as e:
        # 在子进程中的错误需要处理，但不能直接更新UI
        error_msg = f"[进程 {os.getpid()}] 处理片段时出错: {str(e)}\n{traceback.format_exc()}"
        print(error_msg)
        
        # 如果提供了错误字典，记录错误信息
        if error_dict is not None:
            with file_list_lock:
                error_dict[os.getpid()] = error_msg

class MKVAudioExtractor:
    def __init__(self, root):
        self.root = root
        self.root.title("MKV音频提取器")
        self.root.geometry("700x500")
        self.root.resizable(True, True)
        
        # 设置中文字体
        self.font = ("SimHei", 10)
        
        # 初始化变量
        self.mkv_file = ""
        self.audio_tracks = []
        self.subtitle_tracks = []
        self.selected_audio = ""
        self.selected_subtitle = ""
        self.subtitle_data = []
        self.process_count_var = tk.StringVar(value="1")
        
        # 创建UI
        self.create_widgets()
        
    def create_widgets(self):
        """创建用户界面"""
        # 顶部文件选择区域
        file_frame = ttk.Frame(self.root, padding="10")
        file_frame.pack(fill=tk.X)
        
        self.file_path_var = tk.StringVar(value="未选择文件")
        
        select_btn = ttk.Button(file_frame, text="选择MKV文件", command=self.select_mkv_file)
        select_btn.pack(side=tk.LEFT, padx=5)
        
        file_label = ttk.Label(file_frame, textvariable=self.file_path_var, font=self.font)
        file_label.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
        
        # 进程数设置区域
        process_frame = ttk.Frame(self.root, padding="10")
        process_frame.pack(fill=tk.X)
        
        process_label = ttk.Label(process_frame, text="进程数: ", font=self.font)
        process_label.pack(side=tk.LEFT, padx=5)
        
        process_entry = ttk.Entry(process_frame, textvariable=self.process_count_var, width=5)
        process_entry.pack(side=tk.LEFT, padx=5)
        
        process_hint = ttk.Label(process_frame, text="(默认1，设置大于1时启用多进程处理)", font=self.font)
        process_hint.pack(side=tk.LEFT, padx=5)
        
        # 轨道选择标签页
        tab_control = ttk.Notebook(self.root)
        tab_control.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
        
        # 音频轨道标签页
        self.audio_tab = ttk.Frame(tab_control)
        tab_control.add(self.audio_tab, text="音频轨道")
        
        self.audio_listbox = tk.Listbox(self.audio_tab, font=self.font, width=80, height=10)
        self.audio_listbox.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=5, pady=5)
        
        audio_scrollbar = ttk.Scrollbar(self.audio_tab, orient=tk.VERTICAL, command=self.audio_listbox.yview)
        audio_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
        
        self.audio_listbox.config(yscrollcommand=audio_scrollbar.set)
        self.audio_listbox.bind('<<ListboxSelect>>', self.on_audio_select)
        
        # 字幕轨道标签页
        self.subtitle_tab = ttk.Frame(tab_control)
        tab_control.add(self.subtitle_tab, text="字幕轨道")
        
        # 创建字幕列表和导出按钮的容器
        subtitle_content_frame = ttk.Frame(self.subtitle_tab)
        subtitle_content_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
        
        self.subtitle_listbox = tk.Listbox(subtitle_content_frame, font=self.font, width=80, height=10)
        self.subtitle_listbox.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=5, pady=5)
        
        subtitle_scrollbar = ttk.Scrollbar(subtitle_content_frame, orient=tk.VERTICAL, command=self.subtitle_listbox.yview)
        subtitle_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
        
        self.subtitle_listbox.config(yscrollcommand=subtitle_scrollbar.set)
        self.subtitle_listbox.bind('<<ListboxSelect>>', self.on_subtitle_select)
        
        # 导出字幕按钮
        subtitle_btn_frame = ttk.Frame(self.subtitle_tab)
        subtitle_btn_frame.pack(fill=tk.X, padx=5, pady=5)
        
        self.export_subtitle_btn = ttk.Button(subtitle_btn_frame, text="导出字幕", command=self.export_selected_subtitle)
        self.export_subtitle_btn.pack(side=tk.LEFT, padx=5)
        self.export_subtitle_btn.config(state=tk.DISABLED)
        
        # 底部按钮和状态栏
        bottom_frame = ttk.Frame(self.root, padding="10")
        bottom_frame.pack(fill=tk.X, side=tk.BOTTOM)
        
        self.extract_btn = ttk.Button(bottom_frame, text="提取音频", command=self.start_extraction)
        self.extract_btn.pack(side=tk.LEFT, padx=5)
        self.extract_btn.config(state=tk.DISABLED)
        
        # 状态栏
        self.status_var = tk.StringVar(value="就绪")
        status_bar = ttk.Label(self.root, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W, font=self.font)
        status_bar.pack(side=tk.BOTTOM, fill=tk.X)
    
    def update_status(self, message):
        """同时更新状态栏和命令行输出"""
        self.status_var.set(message)
        # 打印到命令行窗口
        print(f"[进度] {message}")

    def select_mkv_file(self):
        """选择MKV文件"""
        file_path = filedialog.askopenfilename(
            title="选择MKV文件",
            filetypes=[("MKV文件", "*.mkv"), ("所有文件", "*.*")]
        )
        
        if file_path:
            self.mkv_file = file_path
            self.file_path_var.set(file_path)
            self.update_status("正在加载音视频轨道信息...")
            
            # 清空列表
            self.audio_listbox.delete(0, tk.END)
            self.subtitle_listbox.delete(0, tk.END)
            
            # 加载音视频轨道信息
            self.load_tracks_info()
            
            # 启用提取按钮
            if self.audio_tracks and self.subtitle_tracks:
                self.extract_btn.config(state=tk.NORMAL)
            else:
                self.extract_btn.config(state=tk.DISABLED)
            
            # 无论如何都禁用导出字幕按钮，直到用户选择字幕
            self.export_subtitle_btn.config(state=tk.DISABLED)
            
            self.update_status("就绪")

    def load_tracks_info(self):
        """加载音视频轨道信息"""
        try:
            # 使用ffprobe获取MKV文件信息（使用JSON格式输出以便更可靠地解析语言信息）
            cmd = [
                "ffprobe",
                "-v", "error",
                "-print_format", "json",
                "-show_streams",
                self.mkv_file
            ]
            
            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
            
            # 解析JSON输出
            import json
            data = json.loads(result.stdout)
            
            # 清空列表
            self.audio_listbox.delete(0, tk.END)
            self.subtitle_listbox.delete(0, tk.END)
            self.audio_tracks = []
            self.subtitle_tracks = []
            
            # 填充音频和字幕列表
            for stream in data.get("streams", []):
                if stream.get("codec_type") == "audio" or stream.get("codec_type") == "subtitle":
                    index = str(stream.get("index", "0"))
                    track_type = stream.get("codec_type", "unknown")
                    codec = stream.get("codec_name", "unknown")
                    
                    # 尝试从多个位置获取语言信息
                    language = "unknown"
                    # 1. 直接从stream中获取
                    if stream.get("language"):
                        language = stream.get("language")
                    # 2. 从tags中获取
                    elif stream.get("tags", {}).get("language"):
                        language = stream.get("tags", {}).get("language")
                    # 3. 特殊处理某些可能的语言标记
                    elif stream.get("tags", {}).get("title"):
                        title = stream.get("tags", {}).get("title").lower()
                        if "中文" in title or "chinese" in title or "cn" in title or "zh" in title:
                            language = "chn"
                        elif "英文" in title or "english" in title or "en" in title:
                            language = "eng"
                    
                    # 标准化语言代码
                    if language == "zho" or language == "chi" or language.startswith("zh"):
                        language = "chn"
                    elif language == "eng" or language.startswith("en"):
                        language = "eng"
                    
                    display_text = f"索引: {index}, 类型: {track_type}, 编码: {codec}, 语言: {language}"
                    
                    if track_type == 'audio':
                        self.audio_tracks.append((index, display_text))
                        self.audio_listbox.insert(tk.END, display_text)
                    elif track_type == 'subtitle':
                        self.subtitle_tracks.append((index, display_text))
                        self.subtitle_listbox.insert(tk.END, display_text)
            
        except Exception as e:
            messagebox.showerror("错误", f"加载音视频轨道信息失败: {str(e)}")
            self.update_status("加载失败")
    
    def on_audio_select(self, event):
        """选择音频轨道"""
        selection = self.audio_listbox.curselection()
        if selection:
            index = selection[0]
            self.selected_audio = self.audio_tracks[index][0]
    
    def on_subtitle_select(self, event):
        """选择字幕轨道"""
        selection = self.subtitle_listbox.curselection()
        if selection:
            index = selection[0]
            self.selected_subtitle = self.subtitle_tracks[index][0]
            # 启用导出字幕按钮
            self.export_subtitle_btn.config(state=tk.NORMAL)
        else:
            # 禁用导出字幕按钮
            self.export_subtitle_btn.config(state=tk.DISABLED)
    
    def export_selected_subtitle(self):
        """导出选中的字幕文件"""
        if not self.selected_subtitle or not self.mkv_file:
            messagebox.showwarning("警告", "请先选择MKV文件和字幕轨道")
            return
        
        try:
            # 获取字幕的语言和编码信息
            subtitle_info = self._get_subtitle_info(self.selected_subtitle)
            if not subtitle_info:
                messagebox.showwarning("警告", "无法获取字幕信息")
                return
            
            language, codec = subtitle_info
            
            # 构建输出文件名: [主文件名]_[语言].[编码]
            base_name = os.path.splitext(os.path.basename(self.mkv_file))[0]
            output_extension = self._get_subtitle_extension(codec)
            default_output = f"{base_name}_{language}.{output_extension}"
            
            # 让用户选择保存位置
            output_path = filedialog.asksaveasfilename(
                title="保存字幕文件",
                defaultextension=f".{output_extension}",
                filetypes=[
                    (f"{codec.upper()}字幕文件", f"*.{output_extension}"),
                    ("所有文件", "*.*")
                ],
                initialfile=default_output
            )
            
            if output_path:
                self.update_status(f"正在导出字幕到 {os.path.basename(output_path)}...")
                
                # 使用ffmpeg提取字幕
                cmd = [
                    "ffmpeg",
                    "-i", self.mkv_file,
                    "-map", f"0:{self.selected_subtitle}",
                    "-y",  # 覆盖现有文件
                    output_path
                ]
                
                subprocess.run(cmd, capture_output=True, text=True, check=True)
                
                self.update_status(f"字幕导出完成: {os.path.basename(output_path)}")
                messagebox.showinfo("成功", f"字幕导出完成！\n文件: {output_path}")
        except Exception as e:
            print(f"导出字幕失败: {str(e)}")
            messagebox.showerror("错误", f"导出字幕失败: {str(e)}")
            self.update_status("字幕导出失败")
    
    def _get_subtitle_info(self, subtitle_index):
        """获取字幕的语言和编码信息"""
        # 查找对应的字幕信息
        for index, display_text in self.subtitle_tracks:
            if index == subtitle_index:
                # 从显示文本中提取语言和编码信息
                # 显示文本格式: "索引: {index}, 类型: {track_type}, 编码: {codec}, 语言: {language}"
                try:
                    # 提取编码信息
                    codec_match = re.search(r'编码: (.+?),', display_text)
                    codec = codec_match.group(1) if codec_match else "unknown"
                    
                    # 提取语言信息
                    language_match = re.search(r'语言: (.+)', display_text)
                    language = language_match.group(1) if language_match else "unknown"
                    
                    return (language, codec)
                except:
                    return ("unknown", "unknown")
        return None
    
    def _get_subtitle_extension(self, codec):
        """根据编码获取对应的文件扩展名"""
        # 常见字幕格式的扩展名映射
        extension_map = {
            'srt': 'srt',
            'ass': 'ass',
            'ssa': 'ssa',
            'sub': 'sub',
            'idx': 'idx',
            'pgs': 'sup',  # PGS字幕通常保存为.sup文件
            'dvb_subtitle': 'sub',
            'dvd_subtitle': 'sub'
        }
        
        # 如果找到匹配的编码，返回对应的扩展名
        for codec_key, ext in extension_map.items():
            if codec_key in codec.lower():
                return ext
        
        # 默认返回srt
        return 'srt'

    def start_extraction(self):
        """开始提取音频"""
        # 检查是否选择了音频和字幕轨道
        if not self.selected_audio:
            messagebox.showwarning("警告", "请先选择音频轨道")
            return
        
        if not self.selected_subtitle:
            messagebox.showwarning("警告", "请先选择字幕轨道")
            return
        
        # 禁用提取按钮
        self.extract_btn.config(state=tk.DISABLED)
        
        # 在后台线程中提取音频
        self.root.after(100, self.extract_audio)
    
    def extract_audio(self):
        """提取音频的主要逻辑"""
        try:
            # 1. 首先提取字幕数据
            self.update_status("正在提取字幕数据...")
            self.extract_subtitle_data()
            
            if not self.subtitle_data:
                self.root.after(0, lambda: messagebox.showwarning("警告", "未提取到字幕数据"))
                self.root.after(0, lambda: self.update_status("未提取到字幕数据"))
                self.root.after(0, lambda: self.extract_btn.config(state=tk.NORMAL))
                return
            
            # 2. 合并相邻的字幕区间（如果间隔小于1秒）
            self.update_status("正在处理字幕区间...")
            merged_segments = self.merge_adjacent_segments()
            
            total_segments = len(merged_segments)
            
            # 3. 获取并验证进程数
            try:
                process_count = int(self.process_count_var.get())
                if process_count < 1:
                    process_count = 1
                # 限制最大进程数为CPU核心数或字幕片段数
                max_processes = min(multiprocessing.cpu_count(), total_segments)
                if process_count > max_processes:
                    process_count = max_processes
            except ValueError:
                process_count = 1
                self.root.after(0, lambda: self.process_count_var.set("1"))
            
            self.update_status(f"准备处理 {total_segments} 个音频片段，使用 {process_count} 个进程")
            
            # 4. 创建临时目录来存储切割的音频片段
            with tempfile.TemporaryDirectory() as temp_dir:
                temp_files = []
                processes = []
                progress_dict = {}
                file_list_lock = None
                
                if process_count == 1:
                    # 单进程模式
                    # 切割音频
                    for i, (start, end) in enumerate(merged_segments):
                        # 更新状态栏
                        progress_msg = f"正在处理片段 {i+1}/{total_segments}: {start} 到 {end}"
                        self.root.after(0, lambda msg=progress_msg: self.update_status(msg))
                        
                        # 切割音频并保存到临时文件
                        temp_file = os.path.join(temp_dir, f"segment_{i}.mp3")
                        self.extract_audio_segment(start, end, temp_file)
                        temp_files.append(temp_file)
                else:
                    # 多进程模式
                    # 创建共享管理器
                    with Manager() as manager:
                        # 创建共享列表来存储所有进程生成的临时文件路径
                        temp_files = manager.list()
                        # 创建共享字典来跟踪进度
                        progress_dict = manager.dict()
                        progress_dict['total'] = total_segments
                        progress_dict['completed'] = 0
                        
                        # 创建共享字典来存储错误信息
                        error_dict = manager.dict()
                        
                        # 创建锁以确保文件列表操作的线程安全
                        file_list_lock = manager.Lock()
                        
                        # 分割任务
                        segment_per_process = (total_segments + process_count - 1) // process_count
                        processes = []
                        
                        # 创建并启动进程
                        for i in range(process_count):
                            start_idx = i * segment_per_process
                            end_idx = min((i + 1) * segment_per_process, total_segments)
                            
                            if start_idx < end_idx:
                                # 分配当前进程要处理的片段
                                process_segments = merged_segments[start_idx:end_idx]
                                
                                print(f"启动进程 {i+1}/{process_count}，处理片段 {start_idx} 到 {end_idx-1}")
                                
                                # 创建进程，传递需要的参数，但不能传递Tkinter相关对象
                                p = multiprocessing.Process(
                                    target=process_segments_worker, 
                                    args=(process_segments, temp_dir, start_idx, 
                                          progress_dict, file_list_lock, temp_files,
                                          self.mkv_file, self.selected_audio, error_dict)
                                )
                                processes.append(p)
                                p.start()
                        
                        # 使用after方法监控进度，避免阻塞主线程
                        self._monitor_progress(processes, progress_dict, total_segments)
                        
                        # 等待所有进程完成，每个进程设置超时时间
                        # 计算每个进程的平均超时时间（每片段最多2分钟）
                        max_timeout = 60 * 2 * ((total_segments + process_count - 1) // process_count)  # 每片段最多2分钟
                        max_timeout = max(max_timeout, 300)  # 至少5分钟
                        
                        # 创建一个字典来跟踪进程的启动时间
                        process_start_times = {p.pid: time.time() for p in processes}
                        
                        # 等待进程完成，但定期检查
                        all_completed = False
                        start_wait_time = time.time()
                        
                        while not all_completed and time.time() - start_wait_time < max_timeout:
                            # 检查是否所有进程都已完成
                            all_completed = all(not p.is_alive() for p in processes)
                            
                            if all_completed:
                                break
                            
                            # 检查是否有进程超时
                            current_time = time.time()
                            for p in processes:
                                if p.is_alive() and current_time - process_start_times[p.pid] > max_timeout:
                                    # 进程超时，尝试终止
                                    print(f"进程 {p.pid} 超时，尝试终止...")
                                    try:
                                        p.terminate()
                                        # 给进程一些时间来终止
                                        p.join(timeout=10)  # 等待10秒
                                        # 如果进程仍然存活，强制终止
                                        if p.is_alive():
                                            print(f"强制终止进程 {p.pid}")
                                            p.kill()
                                    except Exception as e:
                                        print(f"终止进程 {p.pid} 时出错: {str(e)}")
                        
                        # 检查是否有错误
                        if error_dict:
                            error_messages = "\n".join([f"进程 {pid}: {msg}" for pid, msg in error_dict.items()])
                            print(f"多进程处理时出现错误:\n{error_messages}")
                            # 只显示第一个错误给用户
                            first_error = list(error_dict.values())[0]
                            self.root.after(0, lambda err=first_error: \
                                          messagebox.showwarning("警告", f"部分片段处理可能出错:\n{err[:200]}..."))
                        
                        # 将共享列表转换为普通列表
                        temp_files = list(temp_files)
                        
                        # 确保文件按正确的顺序排序
                        def get_segment_index(file_path):
                            # 从文件名中提取索引
                            try:
                                return int(file_path.split('_')[-1].split('.')[0])
                            except:
                                return 0
                        
                        temp_files.sort(key=get_segment_index)
                        
                        # 过滤掉错误或超时的文件
                        valid_temp_files = []
                        for temp_file in temp_files:
                            try:
                                # 检查文件大小是否合理，小于1KB的文件可能是错误文件
                                if os.path.exists(temp_file) and os.path.getsize(temp_file) > 1024:
                                    valid_temp_files.append(temp_file)
                                else:
                                    print(f"跳过无效文件: {temp_file}")
                            except Exception as e:
                                print(f"检查文件 {temp_file} 时出错: {str(e)}")
                        
                        if not valid_temp_files:
                            raise Exception("没有有效的音频片段可供合并")
                        
                        temp_files = valid_temp_files
                
                # 5. 合并所有临时音频文件
                self.root.after(0, lambda: self.update_status("正在合并音频片段..."))
                self.merge_audio_files(temp_files, "output.mp3")
            
            # 6. 完成
            self.root.after(0, lambda t=total_segments: \
                           messagebox.showinfo("成功", f"音频提取完成！输出文件: output.mp3\n共处理 {t} 个片段"))
            self.root.after(0, lambda t=total_segments: \
                           self.update_status(f"音频提取完成，共处理 {t} 个片段"))
            
        except Exception as e:
            print(f"提取音频时出错: {str(e)}")
            traceback.print_exc()
            self.root.after(0, lambda err=str(e): \
                           messagebox.showerror("错误", f"提取音频失败: {err}"))
            self.root.after(0, lambda: self.update_status("提取音频失败"))
        finally:
            self.root.after(0, lambda: self.extract_btn.config(state=tk.NORMAL))
            
    def _monitor_progress(self, processes, progress_dict, total_segments):
        """使用after方法监控进度，避免阻塞主线程"""
        # 检查是否有进程还在运行
        if any(p.is_alive() for p in processes):
            # 更新进度条
            completed = progress_dict.get('completed', 0)
            self.update_status(f"正在处理片段 {completed}/{total_segments}")
            print(f"进度: {completed}/{total_segments} 个片段处理完成")
            # 500毫秒后再次检查
            self.root.after(500, lambda: self._monitor_progress(processes, progress_dict, total_segments))
    
    def extract_subtitle_data(self):
        """提取字幕数据"""
        try:
            # 使用ffmpeg提取字幕文件
            with tempfile.NamedTemporaryFile(suffix='.srt', delete=False) as temp_subtitle:
                temp_subtitle_path = temp_subtitle.name
                
                # 提取字幕
                cmd = [
                    "ffmpeg",
                    "-i", self.mkv_file,
                    "-map", f"0:{self.selected_subtitle}",
                    "-y",
                    temp_subtitle_path
                ]
                
                subprocess.run(cmd, capture_output=True, text=True, check=True)
                
                # 读取字幕文件并解析
                with open(temp_subtitle_path, 'r', encoding='utf-8', errors='replace') as f:
                    subtitle_content = f.read()
                    
                    # 解析SRT格式的字幕
                    self._parse_srt(subtitle_content)
        except Exception as e:
            print(f"提取字幕数据失败: {str(e)}")
            self.subtitle_data = []
    
    def _parse_srt(self, content):
        """解析SRT格式的字幕"""
        # 使用正则表达式解析SRT格式
        pattern = re.compile(r'\d+\s*\n(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})')
        matches = pattern.findall(content)
        
        self.subtitle_data = []
        
        for start_time, end_time in matches:
            # 转换时间格式为秒
            start_seconds = self._time_to_seconds(start_time)
            end_seconds = self._time_to_seconds(end_time)
            
            if start_seconds < end_seconds:
                self.subtitle_data.append((start_seconds, end_seconds))
    
    def _time_to_seconds(self, time_str):
        """将时间字符串转换为秒数"""
        try:
            # 处理SRT格式的时间 (00:00:00,000)
            if ',' in time_str:
                time_str = time_str.replace(',', '.')
            
            # 解析时间
            hours, minutes, seconds = map(float, time_str.split(':'))
            total_seconds = hours * 3600 + minutes * 60 + seconds
            return total_seconds
        except:
            return 0
    
    def merge_adjacent_segments(self):
        """合并相邻的字幕区间（如果间隔小于1秒）"""
        if not self.subtitle_data:
            return []
        
        # 按开始时间排序
        sorted_segments = sorted(self.subtitle_data, key=lambda x: x[0])
        
        merged = [list(sorted_segments[0])]
        
        for start, end in sorted_segments[1:]:
            last_start, last_end = merged[-1]
            
            # 如果当前片段的开始时间与上一个片段的结束时间间隔小于1秒，则合并
            if start - last_end < 1:
                merged[-1] = [last_start, end]
            else:
                merged.append([start, end])
        
        return merged
    
    def extract_audio_segment(self, start_time, end_time, output_file):
        """提取指定时间段的音频"""
        try:
            # 使用ffmpeg提取音频片段
            cmd = [
                "ffmpeg",
                "-i", self.mkv_file,
                "-map", f"0:{self.selected_audio}",
                "-ss", str(start_time),
                "-to", str(end_time),
                "-vn",  # 不包含视频
                "-acodec", "mp3",  # 转换为MP3
                "-y",  # 覆盖现有文件
                output_file
            ]
            
            subprocess.run(cmd, capture_output=True, text=True, check=True)
        except Exception as e:
            print(f"提取音频片段失败: {str(e)}")
            raise
    
    def merge_audio_files(self, input_files, output_file):
        """合并多个音频文件"""
        try:
            # 创建文件列表
            with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as file_list:
                file_list_path = file_list.name
                for file_path in input_files:
                    file_list.write(f"file '{file_path}'\n")
            
            # 使用ffmpeg合并音频文件
            cmd = [
                "ffmpeg",
                "-f", "concat",
                "-safe", "0",
                "-i", file_list_path,
                "-vn",  # 不包含视频
                "-acodec", "copy",  # 直接复制音频编码，不重新编码
                "-y",  # 覆盖现有文件
                output_file
            ]
            
            subprocess.run(cmd, capture_output=True, text=True, check=True)
        except Exception as e:
            print(f"合并音频文件失败: {str(e)}")
            raise
        finally:
            # 清理临时文件列表
            if 'file_list_path' in locals() and os.path.exists(file_list_path):
                try:
                    os.remove(file_list_path)
                except:
                    pass

if __name__ == "__main__":
    # 在Windows系统上，multiprocessing需要在if __name__ == "__main__":中运行
    root = tk.Tk()
    app = MKVAudioExtractor(root)
    root.mainloop()