# -*- coding: utf-8 -*-
"""

"""

from time import sleep
import time
import requests
import pyaudio
import threading
import struct


ASR_BASE_URL = ""
ASR_MODEL_NAME = ""


def set_asr_base_url(asr_base_url):
    global ASR_BASE_URL
    ASR_BASE_URL = asr_base_url


def set_asr_model_name(asr_model_name):
    global ASR_MODEL_NAME
    ASR_MODEL_NAME = asr_model_name


def speech2text(speech_file=None, speech_bytes=None):
    if speech_file is not None:
        speech_bytes = open(speech_file, "rb").read()
    if speech_bytes is None:
        raise RuntimeError(f"Error Input: voice_file and voice_bytes cannot be empty at the same time.")
    
    res = requests.post(url=ASR_BASE_URL, 
                        files = {
                            'model': (None, ASR_MODEL_NAME),
                            'file': speech_bytes,
                        })
    
    assert res.status_code == 200
    return res.json()["text"]


class Listener:

    def __init__(self,
                 format=pyaudio.paInt16,
                 channels=1,
                 rate=16000,
                 input_device_index=None,
                 frames_per_buffer:int=pyaudio.paFramesPerBufferUnspecified):
        self.p = pyaudio.PyAudio()
        self.format = format
        self.channels = channels
        self.rate = rate
        self.input_device_index = input_device_index
        self.frames_per_buffer = frames_per_buffer
        self.recording_thread = None
        self._start_lock = threading.Lock()
        self._recording_lock = threading.Lock()
        self._is_recording = False
        self.thread = None
        self.sampwidth = 2
        self.stream = self._open_stream()
        
    def get_wav_header(self, datalength):
        res = b'RIFF'
        res += struct.pack('<L4s4sLHHLLHH4s',
            36 + datalength, 
            b'WAVE', 
            b'fmt ', 
            16,
            0x0001, 
            self.channels, 
            self.rate,
            self.channels * self.rate * self.sampwidth,
            self.channels * self.sampwidth,
            self.sampwidth * 8, 
            b'data')
        res += struct.pack('<L', datalength)
        return res

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def _open_stream(self):
        return self.p.open(format=self.format,
                           channels=self.channels,
                           rate=self.rate,
                           input=True,
                           input_device_index=self.input_device_index,
                           frames_per_buffer=self.frames_per_buffer)

    def close(self):
        if self.stream:
            self.stream.stop_stream()
            self.stream.close()
        if self.p:
            self.p.terminate()
        if self.speaker:
            self.speaker.close()


if __name__ == '__main__':
    print("Begin")
    
    file_path = "path to wav file"
    with open(file_path, "rb") as reader:
        speech_bytes = reader.read()
    start = time.time()
    for i in range(10):
        print(speech2text(speech_bytes=speech_bytes))
    end = time.time()
    print(f"cost: {end-start}s")
    
    print("End")

