From 51f33a9b6c4d13f8d12d75c230d8edbf2ac74160 Mon Sep 17 00:00:00 2001 From: Pierre-Edouard Portier Date: Thu, 23 Nov 2023 22:25:07 +0100 Subject: [PATCH] Notebook --- transcribe.ipynb | 189 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 transcribe.ipynb diff --git a/transcribe.ipynb b/transcribe.ipynb new file mode 100644 index 0000000..977991b --- /dev/null +++ b/transcribe.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "bc566fe1-26f3-4e4f-9c62-ef5e480e0055", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from openai import OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1f11cc6-4daa-4563-8dc0-fcb763b3c44f", + "metadata": {}, + "outputs": [], + "source": [ + "client = OpenAI()\n", + "client.api_key = os.getenv('OPENAI_API_KEY')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87169d33-5a39-4270-91d7-50353b888a80", + "metadata": {}, + "outputs": [], + "source": [ + "def transcrire_audio(fichier_audio):\n", + " reponse = client.audio.transcriptions.create(\n", + " model=\"whisper-1\",\n", + " file=open(fichier_audio, \"rb\"),\n", + " language=\"fr\"\n", + " )\n", + " return reponse.text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89eb827d-abb9-4b46-92be-f26c02e3ba4c", + "metadata": {}, + "outputs": [], + "source": [ + "def resumer_texte(texte):\n", + " completion = client.chat.completions.create(\n", + " model=\"gpt-4\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"Tu es un assistant personnel compétent, utile et pertinent.\"},\n", + " {\"role\": \"user\", \"content\": \"\"\"\n", + "Tu dois résumer un texte qui est une transcription d'un enregistrement audio d'une intervention faite à l'occasion d'une conférence.\n", + "\n", + "Tu dois d'abord proposer un court paragraphe de résumé du contexte global de l'intervention du conférencier.\n", + "Puis, tu dois proposer une synthèse de l'intervention sous forme d'une liste à puces.\n", + "La liste à puces doit être organisée sur au maximum 2 niveaux hiérarchiques.\n", + "Les entrées de la liste à puces doivent être très synthétiques, exprimées en quelques mots.\n", + "Il est très important que le résumé n'omette pas d'informations importantes.\n", + "Tu dois bien mettre en relief les données chiffrées mentionnés durant le discours. \n", + "\n", + "Le résultat doit être au format Markdown. La réponse doit contenir uniquement le code Markdown et rien d'autre.\n", + "Voici le modèle de réponse au format Markdown :\n", + "\n", + "# Un titre pour le résumé\n", + "## Contexte\n", + "Court paragraphe qui résume le contexte de l'intervention.\n", + "## Synthèse\n", + "- Premier point important\n", + " - Premier détail sur le premier point important.\n", + " - Second détail sur le premier point important.\n", + " - Etc. Etc.\n", + "- Second point important\n", + " - Premier détail sur le second point important.\n", + " - Etc. Etc.\n", + "- Etc. Etc.\n", + " -Etc. Etc.\n", + " - Etc. Etc.\n", + "\n", + "Voici le texte que tu dois résumer :\n", + "\n", + " \"\"\" + texte}\n", + " ]\n", + " )\n", + " return completion.choices[0].message.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "279849d5-56f8-4b59-906e-14992ac10c4a", + "metadata": {}, + "outputs": [], + "source": [ + "texte_transcrit = transcrire_audio('../transcribe_data/jardin_du_carrousel_2.m4a')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "382c92b9-1c28-4906-beff-1d9405b765ee", + "metadata": {}, + "outputs": [], + "source": [ + "texte_transcrit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c5df824-f3cc-4a51-b1ad-c52695fa475f", + "metadata": {}, + "outputs": [], + "source": [ + "texte_transcrit_2 = transcrire_audio('../transcribe_data/jardin_du_carrousel_1.m4a')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8db68ab5-7351-4872-b5e7-0fe02c9760cb", + "metadata": {}, + "outputs": [], + "source": [ + "texte_transcrit_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3272e3b0-e758-4d0b-b4bd-01633d2aab20", + "metadata": {}, + "outputs": [], + "source": [ + "resume_texte_2 = resumer_texte(texte_transcrit_2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0287311f-9c68-4443-bed4-c7fcf2de2344", + "metadata": {}, + "outputs": [], + "source": [ + "print(resume_texte_2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4a18f17-1bf6-48e1-a2d2-c850c2d8c266", + "metadata": {}, + "outputs": [], + "source": [ + "client.api_key" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af604560-1798-4817-80c4-3be63ea7d8f6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TRANSCRIBE_ENV", + "language": "python", + "name": "transcribe_env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}