This commit is contained in:
Pierre-Edouard Portier 2023-11-23 22:25:07 +01:00
parent ceb8b6d419
commit 51f33a9b6c
1 changed files with 189 additions and 0 deletions

189
transcribe.ipynb Normal file
View File

@ -0,0 +1,189 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "bc566fe1-26f3-4e4f-9c62-ef5e480e0055",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from openai import OpenAI"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1f11cc6-4daa-4563-8dc0-fcb763b3c44f",
"metadata": {},
"outputs": [],
"source": [
"client = OpenAI()\n",
"client.api_key = os.getenv('OPENAI_API_KEY')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "87169d33-5a39-4270-91d7-50353b888a80",
"metadata": {},
"outputs": [],
"source": [
"def transcrire_audio(fichier_audio):\n",
" reponse = client.audio.transcriptions.create(\n",
" model=\"whisper-1\",\n",
" file=open(fichier_audio, \"rb\"),\n",
" language=\"fr\"\n",
" )\n",
" return reponse.text"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "89eb827d-abb9-4b46-92be-f26c02e3ba4c",
"metadata": {},
"outputs": [],
"source": [
"def resumer_texte(texte):\n",
" completion = client.chat.completions.create(\n",
" model=\"gpt-4\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": \"Tu es un assistant personnel compétent, utile et pertinent.\"},\n",
" {\"role\": \"user\", \"content\": \"\"\"\n",
"Tu dois résumer un texte qui est une transcription d'un enregistrement audio d'une intervention faite à l'occasion d'une conférence.\n",
"\n",
"Tu dois d'abord proposer un court paragraphe de résumé du contexte global de l'intervention du conférencier.\n",
"Puis, tu dois proposer une synthèse de l'intervention sous forme d'une liste à puces.\n",
"La liste à puces doit être organisée sur au maximum 2 niveaux hiérarchiques.\n",
"Les entrées de la liste à puces doivent être très synthétiques, exprimées en quelques mots.\n",
"Il est très important que le résumé n'omette pas d'informations importantes.\n",
"Tu dois bien mettre en relief les données chiffrées mentionnés durant le discours. \n",
"\n",
"Le résultat doit être au format Markdown. La réponse doit contenir uniquement le code Markdown et rien d'autre.\n",
"Voici le modèle de réponse au format Markdown :\n",
"\n",
"# Un titre pour le résumé\n",
"## Contexte\n",
"Court paragraphe qui résume le contexte de l'intervention.\n",
"## Synthèse\n",
"- Premier point important\n",
" - Premier détail sur le premier point important.\n",
" - Second détail sur le premier point important.\n",
" - Etc. Etc.\n",
"- Second point important\n",
" - Premier détail sur le second point important.\n",
" - Etc. Etc.\n",
"- Etc. Etc.\n",
" -Etc. Etc.\n",
" - Etc. Etc.\n",
"\n",
"Voici le texte que tu dois résumer :\n",
"\n",
" \"\"\" + texte}\n",
" ]\n",
" )\n",
" return completion.choices[0].message.content"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "279849d5-56f8-4b59-906e-14992ac10c4a",
"metadata": {},
"outputs": [],
"source": [
"texte_transcrit = transcrire_audio('../transcribe_data/jardin_du_carrousel_2.m4a')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "382c92b9-1c28-4906-beff-1d9405b765ee",
"metadata": {},
"outputs": [],
"source": [
"texte_transcrit"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c5df824-f3cc-4a51-b1ad-c52695fa475f",
"metadata": {},
"outputs": [],
"source": [
"texte_transcrit_2 = transcrire_audio('../transcribe_data/jardin_du_carrousel_1.m4a')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8db68ab5-7351-4872-b5e7-0fe02c9760cb",
"metadata": {},
"outputs": [],
"source": [
"texte_transcrit_2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3272e3b0-e758-4d0b-b4bd-01633d2aab20",
"metadata": {},
"outputs": [],
"source": [
"resume_texte_2 = resumer_texte(texte_transcrit_2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0287311f-9c68-4443-bed4-c7fcf2de2344",
"metadata": {},
"outputs": [],
"source": [
"print(resume_texte_2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4a18f17-1bf6-48e1-a2d2-c850c2d8c266",
"metadata": {},
"outputs": [],
"source": [
"client.api_key"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af604560-1798-4817-80c4-3be63ea7d8f6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "TRANSCRIBE_ENV",
"language": "python",
"name": "transcribe_env"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}