Add text to speech module

pull/1807/head
Heiner Lohaus 2 months ago
parent 24345bc07b
commit d4a92bb8df

@ -21,6 +21,7 @@ from .ChatgptFree import ChatgptFree
from .ChatgptNext import ChatgptNext
from .ChatgptX import ChatgptX
from .DeepInfra import DeepInfra
from .DuckDuckGo import DuckDuckGo
from .FlowGpt import FlowGpt
from .FreeChatgpt import FreeChatgpt
from .FreeGpt import FreeGpt

@ -37,6 +37,7 @@
import llamaTokenizer from "llama-tokenizer-js"
</script>
<script src="https://unpkg.com/gpt-tokenizer/dist/cl100k_base.js" async></script>
<script src="/static/js/text_to_speech/index.js" async></script>
<script>
const user_image = '<img src="/static/img/user.png" alt="your avatar">';
const gpt_image = '<img src="/static/img/gpt.png" alt="your avatar">';

@ -315,6 +315,7 @@ body {
display: flex;
flex-direction: column;
gap: 10px;
flex-wrap: wrap;
}
.message .content,
@ -332,6 +333,10 @@ body {
max-width: 400px;
}
.message .content .audio{
display: flex;
}
.message .user i {
position: absolute;
bottom: -6px;
@ -357,7 +362,8 @@ body {
cursor: pointer;
}
.message .count .fa-clipboard {
.message .count .fa-clipboard,
.message .count .fa-volume-high {
z-index: 1000;
cursor: pointer;
}
@ -374,6 +380,10 @@ body {
color: var(--accent);
}
.message .count .fa-volume-high.active {
color: var(--accent);
}
.message .assistant:hover .fa-xmark,
.message .user:hover .fa-xmark {
display: block;
@ -1136,4 +1146,14 @@ a:-webkit-any-link {
.hidden {
display: none;
}
.blink {
animation: blinker 1s step-start infinite;
}
@keyframes blinker {
50% {
opacity: 0;
}
}

@ -64,6 +64,7 @@ const highlight = (container) => {
);
}
let stopped = false;
const register_message_buttons = async () => {
document.querySelectorAll(".message .fa-xmark").forEach(async (el) => {
if (!("click" in el.dataset)) {
@ -90,6 +91,72 @@ const register_message_buttons = async () => {
})
}
});
document.querySelectorAll(".message .fa-volume-high").forEach(async (el) => {
if (!("click" in el.dataset)) {
el.dataset.click = "true";
el.addEventListener("click", async () => {
if ("active" in el.classList || window.doSpeech || stopped) {
stopped = true;
return;
}
el.classList.add("blink")
el.classList.add("active")
const message_el = el.parentElement.parentElement.parentElement;
const content_el = el.parentElement.parentElement;
let speechText = await get_message(window.conversation_id, message_el.dataset.index);
speechText = speechText.replaceAll(/\[(.+)\]\(.+\)/gm, "($1)");
speechText = speechText.replaceAll(/\(http.+\)/gm, "");
speechText = speechText.replaceAll("`", "").replaceAll("#", "")
speechText = speechText.replaceAll(
/<!-- generated images start -->[\s\S]+<!-- generated images end -->/gm,
""
)
const lines = speechText.trim().split(/\n|\.|;/);
let ended = true;
window.onSpeechResponse = (url) => {
if (url) {
var sound = document.createElement('audio');
sound.controls = 'controls';
sound.src = url;
sound.type = 'audio/wav';
if (ended) {
sound.autoplay = true;
}
sound.onended = function() {
ended = true;
};
sound.onplay = function() {
ended = false;
};
var container = document.createElement('div');
container.classList.add("audio");
container.appendChild(sound);
content_el.appendChild(container);
}
if (lines.length < 1 || stopped) {
el.classList.remove("blink");
el.classList.remove("active");
return;
}
while (lines.length > 0) {
let line = lines.shift();
var reg = new RegExp('^[0-9]$');
if (line && !reg.test(line)) {
return handleGenerateSpeech(line);
}
}
if (!line) {
el.classList.remove("blink")
el.classList.remove("active")
}
}
let line = lines.shift();
return handleGenerateSpeech(line);
});
}
});
}
const delete_conversations = async () => {
@ -145,7 +212,11 @@ const handle_ask = async () => {
: ''
}
</div>
<div class="count">${count_words_and_tokens(message, get_selected_model())} <i class="fa-regular fa-clipboard"></i></div>
<div class="count">
${count_words_and_tokens(message, get_selected_model())}
<i class="fa-solid fa-volume-high"></i>
<i class="fa-regular fa-clipboard"></i>
</div>
</div>
</div>
`;
@ -479,7 +550,11 @@ const load_conversation = async (conversation_id, scroll=true) => {
<div class="content">
${provider}
<div class="content_inner">${markdown_render(item.content)}</div>
<div class="count">${count_words_and_tokens(item.content, next_provider?.model)} <i class="fa-regular fa-clipboard"></i></div>
<div class="count">
${count_words_and_tokens(item.content, next_provider?.model)}
<i class="fa-solid fa-volume-high"></i>
<i class="fa-regular fa-clipboard"></i>
</div>
</div>
</div>
`;
@ -1149,10 +1224,12 @@ if (SpeechRecognition) {
}
let startValue;
let lastValue;
let timeoutHandle;
recognition.onstart = function() {
microLabel.classList.add("recognition");
startValue = messageInput.value;
lastValue = "";
timeoutHandle = window.setTimeout(may_stop, 8000);
};
recognition.onend = function() {
@ -1163,25 +1240,22 @@ if (SpeechRecognition) {
return;
}
window.clearTimeout(timeoutHandle);
let notFinal = "";
event.results.forEach((result) => {
const newText = result[0].transcript;
if (newText) {
let newText;
Array.from(event.results).forEach((result) => {
newText = result[0].transcript;
if (newText && newText != lastValue) {
messageInput.value = `${startValue ? startValue+"\n" : ""}${newText.trim()}`;
if (result.isFinal) {
messageInput.value = `${startValue ? startValue+"\n" : ""}${newText.trim()}`;
lastValue = newText;
startValue = messageInput.value;
notFinal = "";
messageInput.focus();
} else {
notFinal += newText;
messageInput.value = `${startValue ? startValue+"\n" : ""}${notFinal.trim()}`;
}
messageInput.style.height = messageInput.scrollHeight + "px";
messageInput.scrollTop = messageInput.scrollHeight;
}
});
window.clearTimeout(timeoutHandle);
timeoutHandle = window.setTimeout(may_stop, notFinal ? 5000 : 8000);
timeoutHandle = window.setTimeout(may_stop, newText ? 8000 : 5000);
};
microLabel.addEventListener("click", () => {
@ -1189,8 +1263,8 @@ if (SpeechRecognition) {
window.clearTimeout(timeoutHandle);
recognition.stop();
} else {
const lang = document.getElementById("recognition-language")?.value || navigator.language;
recognition.lang = lang;
const lang = document.getElementById("recognition-language")?.value;
recognition.lang = lang || navigator.language;
recognition.start();
}
});

@ -0,0 +1 @@
(()=>{var e,t,r,n,a={896:(e,t,r)=>{"use strict";var n=r(900);function a(e,t,r){for(let n=0;n<r.length;++n)e.setUint8(t+n,r.charCodeAt(n))}n._K2.allowLocalModels=!1;class s{static BASE_URL="https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/";static model_id="Xenova/speecht5_tts";static vocoder_id="Xenova/speecht5_hifigan";static tokenizer_instance=null;static model_instance=null;static vocoder_instance=null;static async getInstance(e=null){return null===this.tokenizer_instance&&(this.tokenizer=n.v6I.from_pretrained(this.model_id,{progress_callback:e})),null===this.model_instance&&(this.model_instance=n.fqH.from_pretrained(this.model_id,{quantized:!1,progress_callback:e})),null===this.vocoder_instance&&(this.vocoder_instance=n.oJL.from_pretrained(this.vocoder_id,{quantized:!1,progress_callback:e})),new Promise((async(e,t)=>{const r=await Promise.all([this.tokenizer,this.model_instance,this.vocoder_instance]);self.postMessage({status:"ready"}),e(r)}))}static async getSpeakerEmbeddings(e){const t=`${this.BASE_URL}${e}.bin`;return new n.qYS("float32",new Float32Array(await(await fetch(t)).arrayBuffer()),[1,512])}}const o=new Map;self.addEventListener("message",(async e=>{const[t,r,n]=await s.getInstance((e=>{self.postMessage(e)})),{input_ids:i}=t(e.data.text);let c,l=o.get(e.data.speaker_id);void 0===l&&(l=await s.getSpeakerEmbeddings(e.data.speaker_id),o.set(e.data.speaker_id,l));try{c=await r.generate_speech(i,l,{vocoder:n})}catch(e){throw self.postMessage({status:"error",exception:e}),e}const{waveform:d}=c,p=function(e){let t=44;const r=new ArrayBuffer(t+4*e.length),n=new DataView(r);a(n,0,"RIFF"),n.setUint32(4,36+4*e.length,!0),a(n,8,"WAVE"),a(n,12,"fmt "),n.setUint32(16,16,!0),n.setUint16(20,3,!0),n.setUint16(22,1,!0),n.setUint32(24,16e3,!0),n.setUint32(28,64e3,!0),n.setUint16(32,4,!0),n.setUint16(34,32,!0),a(n,36,"data"),n.setUint32(40,4*e.length,!0);for(let r=0;r<e.length;++r,t+=4)n.setFloat32(t,e[r],!0);return r}(d.data);self.postMessage({status:"complete",output:new Blob([p],{type:"audio/wav"})})}))},52:()=>{},143:()=>{},603:()=>{},806:()=>{},853:()=>{},9:()=>{},837:()=>{},499:()=>{}},s={};function o(e){var t=s[e];if(void 0!==t)return t.exports;var r=s[e]={exports:{}};return a[e](r,r.exports,o),r.exports}o.m=a,o.x=()=>{var e=o.O(void 0,[900],(()=>o(896)));return o.O(e)},e=[],o.O=(t,r,n,a)=>{if(!r){var s=1/0;for(d=0;d<e.length;d++){for(var[r,n,a]=e[d],i=!0,c=0;c<r.length;c++)(!1&a||s>=a)&&Object.keys(o.O).every((e=>o.O[e](r[c])))?r.splice(c--,1):(i=!1,a<s&&(s=a));if(i){e.splice(d--,1);var l=n();void 0!==l&&(t=l)}}return t}a=a||0;for(var d=e.length;d>0&&e[d-1][2]>a;d--)e[d]=e[d-1];e[d]=[r,n,a]},r=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,o.t=function(e,n){if(1&n&&(e=this(e)),8&n)return e;if("object"==typeof e&&e){if(4&n&&e.__esModule)return e;if(16&n&&"function"==typeof e.then)return e}var a=Object.create(null);o.r(a);var s={};t=t||[null,r({}),r([]),r(r)];for(var i=2&n&&e;"object"==typeof i&&!~t.indexOf(i);i=r(i))Object.getOwnPropertyNames(i).forEach((t=>s[t]=()=>e[t]));return s.default=()=>e,o.d(a,s),a},o.d=(e,t)=>{for(var r in t)o.o(t,r)&&!o.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},o.f={},o.e=e=>Promise.all(Object.keys(o.f).reduce(((t,r)=>(o.f[r](e,t),t)),[])),o.u=e=>e+".index.js",o.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),o.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),o.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{var e;o.g.importScripts&&(e=o.g.location+"");var t=o.g.document;if(!e&&t&&(t.currentScript&&(e=t.currentScript.src),!e)){var r=t.getElementsByTagName("script");if(r.length)for(var n=r.length-1;n>-1&&(!e||!/^http(s?):/.test(e));)e=r[n--].src}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),o.p=e})(),(()=>{var e={630:1};o.f.i=(t,r)=>{e[t]||importScripts(o.p+o.u(t))};var t=self.webpackChunk=self.webpackChunk||[],r=t.push.bind(t);t.push=t=>{var[n,a,s]=t;for(var i in a)o.o(a,i)&&(o.m[i]=a[i]);for(s&&s(o);n.length;)e[n.pop()]=1;r(t)}})(),n=o.x,o.x=()=>o.e(900).then(n),o.x()})();

File diff suppressed because one or more lines are too long

@ -0,0 +1 @@
(()=>{"use strict";var e={m:{},u:e=>e+".index.js"};e.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),e.o=(e,c)=>Object.prototype.hasOwnProperty.call(e,c),(()=>{var c;e.g.importScripts&&(c=e.g.location+"");var t=e.g.document;if(!c&&t&&(t.currentScript&&(c=t.currentScript.src),!c)){var a=t.getElementsByTagName("script");if(a.length)for(var r=a.length-1;r>-1&&(!c||!/^http(s?):/.test(c));)c=a[r--].src}if(!c)throw new Error("Automatic publicPath is not supported in this browser");c=c.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),e.p=c})(),e.b=document.baseURI||self.location.href;const c={};c.current||(c.current=new Worker(new URL(e.p+e.u(630),e.b),{type:void 0})),window.doSpeech=!1,c.current.addEventListener("message",(e=>{switch(e.data.status){case"error":window.onSpeechResponse(null),window.doSpeech=!1;break;case"complete":const c=URL.createObjectURL(e.data.output);window.onSpeechResponse(c),window.doSpeech=!1}})),window.SPEAKERS={"US female 1":"cmu_us_slt_arctic-wav-arctic_a0001","US female 2":"cmu_us_clb_arctic-wav-arctic_a0001","US male 1":"cmu_us_bdl_arctic-wav-arctic_a0003","US male 2":"cmu_us_rms_arctic-wav-arctic_a0003","Canadian male":"cmu_us_jmk_arctic-wav-arctic_a0002","Scottish male":"cmu_us_awb_arctic-wav-arctic_b0002","Indian male":"cmu_us_ksp_arctic-wav-arctic_a0007"},window.handleGenerateSpeech=(e,t="cmu_us_slt_arctic-wav-arctic_a0001")=>{window.doSpeech=!0,c.current.postMessage({text:e,speaker_id:t})},window.onSpeechResponse=e=>console.log(e)})();
Loading…
Cancel
Save