diff --git a/de_bello_gallico.ipynb b/de_bello_gallico.ipynb new file mode 100644 index 0000000..bd517bf --- /dev/null +++ b/de_bello_gallico.ipynb @@ -0,0 +1,1224 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "2b2f0cd7-2adc-451d-91c8-afa496cacb3f", + "metadata": {}, + "outputs": [], + "source": [ + "## Requires Python 3.7, 3.8, 3.9, 3.10 on a POSIX-compliant OS\n", + "\n", + "## Install the Classical Language Toolkit\n", + "# !pip install cltk" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "d3b6884a-0ef5-430a-b872-81fa97b98c56", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "from cltk import NLP\n", + "from cltk.data.fetch import FetchCorpus\n", + "from sklearn.feature_extraction.text import TfidfVectorizer, TfidfTransformer;\n", + "from sklearn.decomposition import PCA" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f64d21f4-b285-4ba2-9b3f-8c8a7f29b45c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['lat_text_perseus',\n", + " 'lat_treebank_perseus',\n", + " 'lat_text_latin_library',\n", + " 'phi5',\n", + " 'phi7',\n", + " 'latin_proper_names_cltk',\n", + " 'lat_models_cltk',\n", + " 'latin_pos_lemmata_cltk',\n", + " 'latin_treebank_index_thomisticus',\n", + " 'latin_lexica_perseus',\n", + " 'latin_training_set_sentence_cltk',\n", + " 'latin_word2vec_cltk',\n", + " 'latin_text_antique_digiliblt',\n", + " 'latin_text_corpus_grammaticorum_latinorum',\n", + " 'latin_text_poeti_ditalia',\n", + " 'lat_text_tesserae',\n", + " 'cltk_lat_lewis_elementary_lexicon']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# get texts\n", + "\n", + "corpus_downloader = FetchCorpus(language=\"lat\")\n", + "corpus_downloader.list_corpora" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9bcb9cbb-ca49-4a21-93db-d14c96b33c1d", + "metadata": {}, + "outputs": [], + "source": [ + "# corpus_downloader.import_corpus(\"lat_text_tesserae\") # downloads plain text files" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cab0d102-f606-46c0-81c4-ec942c989430", + "metadata": {}, + "outputs": [], + "source": [ + "# variable assignment\n", + "\n", + "with open(\"/home/tim/cltk_data/lat/text/lat_text_tesserae/texts/caesar.de_bello_gallico.part.1.tess\") as fo:\n", + " ch1 = fo.read()\n", + "with open(\"/home/tim/cltk_data/lat/text/lat_text_tesserae/texts/caesar.de_bello_gallico.part.2.tess\") as fo:\n", + " ch2 = fo.read()\n", + "with open(\"/home/tim/cltk_data/lat/text/lat_text_tesserae/texts/caesar.de_bello_gallico.part.3.tess\") as fo:\n", + " ch3 = fo.read()\n", + "with open(\"/home/tim/cltk_data/lat/text/lat_text_tesserae/texts/caesar.de_bello_gallico.part.4.tess\") as fo:\n", + " ch4 = fo.read()\n", + "with open(\"/home/tim/cltk_data/lat/text/lat_text_tesserae/texts/caesar.de_bello_gallico.part.5.tess\") as fo:\n", + " ch5 = fo.read()\n", + "with open(\"/home/tim/cltk_data/lat/text/lat_text_tesserae/texts/caesar.de_bello_gallico.part.6.tess\") as fo:\n", + " ch6 = fo.read()\n", + "with open(\"/home/tim/cltk_data/lat/text/lat_text_tesserae/texts/caesar.de_bello_gallico.part.7.tess\") as fo:\n", + " ch7 = fo.read()\n", + "with open(\"/home/tim/cltk_data/lat/text/lat_text_tesserae/texts/caesar.de_bello_gallico.part.8.tess\") as fo:\n", + " ch8 = fo.read()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "46cc146d-77ae-4242-869c-45910279a544", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "‎𐤀 CLTK version '1.1.6'.\n", + "Pipeline for language 'Latin' (ISO: 'lat'): `LatinNormalizeProcess`, `LatinStanzaProcess`, `LatinEmbeddingsProcess`, `StopsProcess`, `LatinLexiconProcess`.\n" + ] + } + ], + "source": [ + "# instantiate the pipeline\n", + "\n", + "cltk_nlp = NLP(language=\"lat\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "576affdd-cad6-485d-b0df-b97bd3b7611c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[, , , ]\n" + ] + } + ], + "source": [ + "# pre-processing\n", + "# remove ``LatinLexiconProcess`` because it is slow (adds ~9 mins total)\n", + "\n", + "cltk_nlp.pipeline.processes.pop(-1)\n", + "print(cltk_nlp.pipeline.processes)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3e94698d-be62-4086-92d4-0b150a94fe72", + "metadata": {}, + "outputs": [], + "source": [ + "# process each text\n", + "\n", + "text1 = cltk_nlp.analyze(text = ch1)\n", + "text2 = cltk_nlp.analyze(text = ch2)\n", + "text3 = cltk_nlp.analyze(text = ch3)\n", + "text4 = cltk_nlp.analyze(text = ch4)\n", + "text5 = cltk_nlp.analyze(text = ch1)\n", + "text6 = cltk_nlp.analyze(text = ch6)\n", + "text7 = cltk_nlp.analyze(text = ch7)\n", + "text8 = cltk_nlp.analyze(text = ch8)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d8e781dc-9ab4-440e-90d9-15fb0ef4c93b", + "metadata": {}, + "outputs": [], + "source": [ + "# make a list of texts\n", + "\n", + "corpus = [text1.raw, text2.raw, text3.raw, text4.raw,\n", + " text5.raw, text6.raw, text7.raw, text8.raw]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b553b410-a903-426a-a369-9536e77b3348", + "metadata": {}, + "outputs": [], + "source": [ + "# instantiate the vectorizer\n", + "\n", + "vectorizer = TfidfVectorizer(min_df = 1.0, max_features = 400, use_idf = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "e2af9d0e-6b18-43c2-8875-5a8f79f2d4df", + "metadata": {}, + "outputs": [], + "source": [ + "text_frequencies = vectorizer.fit_transform(corpus).todense()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "7f1e86e8-f5a3-4d19-836e-affcd622004e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8, 295)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text_frequencies.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "160b9960-a03f-4240-a20b-5a61550b6c40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.matrix" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(text_frequencies)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "def520f2-7563-418d-8c99-05558f08a25c", + "metadata": {}, + "outputs": [], + "source": [ + "# make the matrix a dataframe\n", + "\n", + "df = pd.DataFrame(text_frequencies)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "55b04e53-8374-4e86-b761-f5ae06df3341", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...285286287288289290291292293294
00.0130700.0130700.0163380.0130700.0147040.0081690.0098030.0098030.0163380.008169...0.0049010.0114370.0032680.1209020.0408450.0032680.0016340.0049010.0065350.003268
10.0180430.0180430.0240580.0180430.0180430.0150360.0150360.0090220.0240580.009022...0.0060140.0210500.0090220.1172810.0240580.0030070.0030070.0090220.0120290.006014
20.0214410.0250150.0214410.0357350.0536030.0393090.0285880.0214410.0142940.010721...0.0178680.0071470.0071470.0929120.0214410.0035740.0035740.0107210.0071470.007147
30.0183970.0157690.0157690.0157690.0131410.0131410.0210250.0262820.0105130.010513...0.0078840.0078840.0052560.0841010.0262820.0052560.0052560.0078840.0078840.005256
40.0130700.0130700.0163380.0130700.0147040.0081690.0098030.0098030.0163380.008169...0.0049010.0114370.0032680.1209020.0408450.0032680.0016340.0049010.0065350.003268
50.0178450.0133840.0223060.0267680.0133840.0044610.0111530.0111530.0066920.008923...0.0066920.0111530.0089230.1115310.0066920.0066920.0022310.0066920.0022310.002231
60.0131130.0120200.0087420.0043710.0120200.0076490.0043710.0087420.0043710.006556...0.0054640.0065560.0021850.1059930.0043710.0032780.0010930.0054640.0021850.002185
70.0108650.0065190.0152110.0086920.0108650.0130380.0086920.0043460.0086920.017384...0.0195570.0152110.0021730.0890910.0021730.0021730.0021730.0043460.0021730.002173
\n", + "

8 rows × 295 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 \\\n", + "0 0.013070 0.013070 0.016338 0.013070 0.014704 0.008169 0.009803 \n", + "1 0.018043 0.018043 0.024058 0.018043 0.018043 0.015036 0.015036 \n", + "2 0.021441 0.025015 0.021441 0.035735 0.053603 0.039309 0.028588 \n", + "3 0.018397 0.015769 0.015769 0.015769 0.013141 0.013141 0.021025 \n", + "4 0.013070 0.013070 0.016338 0.013070 0.014704 0.008169 0.009803 \n", + "5 0.017845 0.013384 0.022306 0.026768 0.013384 0.004461 0.011153 \n", + "6 0.013113 0.012020 0.008742 0.004371 0.012020 0.007649 0.004371 \n", + "7 0.010865 0.006519 0.015211 0.008692 0.010865 0.013038 0.008692 \n", + "\n", + " 7 8 9 ... 285 286 287 288 \\\n", + "0 0.009803 0.016338 0.008169 ... 0.004901 0.011437 0.003268 0.120902 \n", + "1 0.009022 0.024058 0.009022 ... 0.006014 0.021050 0.009022 0.117281 \n", + "2 0.021441 0.014294 0.010721 ... 0.017868 0.007147 0.007147 0.092912 \n", + "3 0.026282 0.010513 0.010513 ... 0.007884 0.007884 0.005256 0.084101 \n", + "4 0.009803 0.016338 0.008169 ... 0.004901 0.011437 0.003268 0.120902 \n", + "5 0.011153 0.006692 0.008923 ... 0.006692 0.011153 0.008923 0.111531 \n", + "6 0.008742 0.004371 0.006556 ... 0.005464 0.006556 0.002185 0.105993 \n", + "7 0.004346 0.008692 0.017384 ... 0.019557 0.015211 0.002173 0.089091 \n", + "\n", + " 289 290 291 292 293 294 \n", + "0 0.040845 0.003268 0.001634 0.004901 0.006535 0.003268 \n", + "1 0.024058 0.003007 0.003007 0.009022 0.012029 0.006014 \n", + "2 0.021441 0.003574 0.003574 0.010721 0.007147 0.007147 \n", + "3 0.026282 0.005256 0.005256 0.007884 0.007884 0.005256 \n", + "4 0.040845 0.003268 0.001634 0.004901 0.006535 0.003268 \n", + "5 0.006692 0.006692 0.002231 0.006692 0.002231 0.002231 \n", + "6 0.004371 0.003278 0.001093 0.005464 0.002185 0.002185 \n", + "7 0.002173 0.002173 0.002173 0.004346 0.002173 0.002173 \n", + "\n", + "[8 rows x 295 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "e349f960-4a18-4237-b780-98a912fbfb16", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8, 295)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "f8a5160b-d739-40a7-80e8-f6672d1ed129", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/tim/predictive_analytics/DATA301_final_project/envs/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.\n", + " warnings.warn(msg, category=FutureWarning)\n" + ] + } + ], + "source": [ + "# we need labels for columns and rows\n", + "\n", + "wordnames = vectorizer.get_feature_names() # columns\n", + "titles = ['ch1', 'ch2', 'ch3', 'ch4', 'ch5', 'ch6', 'ch7', 'ch8'] # rows\n", + "\n", + "df = pd.DataFrame(text_frequencies, columns = wordnames, index = titles)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "43b51e60-ad52-41b1-97a7-c5a98403f937", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
10111213141516171819...undiqueunumususututivenitventumverovivulneribus
ch10.0130700.0130700.0163380.0130700.0147040.0081690.0098030.0098030.0163380.008169...0.0049010.0114370.0032680.1209020.0408450.0032680.0016340.0049010.0065350.003268
ch20.0180430.0180430.0240580.0180430.0180430.0150360.0150360.0090220.0240580.009022...0.0060140.0210500.0090220.1172810.0240580.0030070.0030070.0090220.0120290.006014
ch30.0214410.0250150.0214410.0357350.0536030.0393090.0285880.0214410.0142940.010721...0.0178680.0071470.0071470.0929120.0214410.0035740.0035740.0107210.0071470.007147
ch40.0183970.0157690.0157690.0157690.0131410.0131410.0210250.0262820.0105130.010513...0.0078840.0078840.0052560.0841010.0262820.0052560.0052560.0078840.0078840.005256
ch50.0130700.0130700.0163380.0130700.0147040.0081690.0098030.0098030.0163380.008169...0.0049010.0114370.0032680.1209020.0408450.0032680.0016340.0049010.0065350.003268
ch60.0178450.0133840.0223060.0267680.0133840.0044610.0111530.0111530.0066920.008923...0.0066920.0111530.0089230.1115310.0066920.0066920.0022310.0066920.0022310.002231
ch70.0131130.0120200.0087420.0043710.0120200.0076490.0043710.0087420.0043710.006556...0.0054640.0065560.0021850.1059930.0043710.0032780.0010930.0054640.0021850.002185
ch80.0108650.0065190.0152110.0086920.0108650.0130380.0086920.0043460.0086920.017384...0.0195570.0152110.0021730.0890910.0021730.0021730.0021730.0043460.0021730.002173
\n", + "

8 rows × 295 columns

\n", + "
" + ], + "text/plain": [ + " 10 11 12 13 14 15 16 \\\n", + "ch1 0.013070 0.013070 0.016338 0.013070 0.014704 0.008169 0.009803 \n", + "ch2 0.018043 0.018043 0.024058 0.018043 0.018043 0.015036 0.015036 \n", + "ch3 0.021441 0.025015 0.021441 0.035735 0.053603 0.039309 0.028588 \n", + "ch4 0.018397 0.015769 0.015769 0.015769 0.013141 0.013141 0.021025 \n", + "ch5 0.013070 0.013070 0.016338 0.013070 0.014704 0.008169 0.009803 \n", + "ch6 0.017845 0.013384 0.022306 0.026768 0.013384 0.004461 0.011153 \n", + "ch7 0.013113 0.012020 0.008742 0.004371 0.012020 0.007649 0.004371 \n", + "ch8 0.010865 0.006519 0.015211 0.008692 0.010865 0.013038 0.008692 \n", + "\n", + " 17 18 19 ... undique unum usus \\\n", + "ch1 0.009803 0.016338 0.008169 ... 0.004901 0.011437 0.003268 \n", + "ch2 0.009022 0.024058 0.009022 ... 0.006014 0.021050 0.009022 \n", + "ch3 0.021441 0.014294 0.010721 ... 0.017868 0.007147 0.007147 \n", + "ch4 0.026282 0.010513 0.010513 ... 0.007884 0.007884 0.005256 \n", + "ch5 0.009803 0.016338 0.008169 ... 0.004901 0.011437 0.003268 \n", + "ch6 0.011153 0.006692 0.008923 ... 0.006692 0.011153 0.008923 \n", + "ch7 0.008742 0.004371 0.006556 ... 0.005464 0.006556 0.002185 \n", + "ch8 0.004346 0.008692 0.017384 ... 0.019557 0.015211 0.002173 \n", + "\n", + " ut uti venit ventum vero vi vulneribus \n", + "ch1 0.120902 0.040845 0.003268 0.001634 0.004901 0.006535 0.003268 \n", + "ch2 0.117281 0.024058 0.003007 0.003007 0.009022 0.012029 0.006014 \n", + "ch3 0.092912 0.021441 0.003574 0.003574 0.010721 0.007147 0.007147 \n", + "ch4 0.084101 0.026282 0.005256 0.005256 0.007884 0.007884 0.005256 \n", + "ch5 0.120902 0.040845 0.003268 0.001634 0.004901 0.006535 0.003268 \n", + "ch6 0.111531 0.006692 0.006692 0.002231 0.006692 0.002231 0.002231 \n", + "ch7 0.105993 0.004371 0.003278 0.001093 0.005464 0.002185 0.002185 \n", + "ch8 0.089091 0.002173 0.002173 0.002173 0.004346 0.002173 0.002173 \n", + "\n", + "[8 rows x 295 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "2a231cde-34d1-4444-a681-93fcef31f5d3", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ch1 0.070254\n", + "ch2 0.075180\n", + "ch3 0.039309\n", + "ch4 0.094613\n", + "ch5 0.070254\n", + "Name: caesar, dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['caesar'].head() # how often does this word appear in each text?" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "e907cbac-dd41-4959-9602-43aa1dcd0805", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAGoCAYAAABlvr66AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAiFUlEQVR4nO3df1TUZf6/8efwcyyVCgvBRcQsxbVyG/YUJrmV4vor3bXN2l21FDtEZUJmopYLnaIt85ClcFTIOieVVtvdLE4LWZormEfkWFtsZqF4lJHQDSxPIDCfP/o632ZnUIcF5ma4HufMOc177pm5X6fIy/cMMxaHw+EQAACAwQJ8vQEAAIALIVgAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYLwgX2+go7S2tur48ePq06ePLBaLr7cDAAAugsPh0OnTpxUVFaWAgLbPo/hNsBw/flzR0dG+3gYAAGiHo0eP6mc/+1mbt/tNsPTp00fSjwP37dvXx7sBAAAXo6GhQdHR0c4/x9viN8Fy7mWgvn37EiwAAHQzF3o7B2+6BQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgvCBfbwBA1xm0+F2fPffh5yb57LkBdH+cYQEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMZrV7CsWbNGsbGxslqtstls2rVr13nX79y5UzabTVarVYMHD1ZeXp7bmpycHA0dOlS9evVSdHS00tLS9MMPP7RnewAAwM94HSyFhYVasGCBli5dqoqKCiUmJmrChAmqrq72uL6qqkoTJ05UYmKiKioqtGTJEs2fP19bt251rnnjjTe0ePFiLV++XJWVlcrPz1dhYaEyMjLaPxkAAPAbFofD4fDmDjfddJNuvPFG5ebmOo/FxcVp2rRpys7Odlv/xBNP6O2331ZlZaXzWEpKig4cOKCysjJJ0sMPP6zKykpt377dueaxxx7T3r17L3j25pyGhgaFhYWpvr5effv29WYkoMcYtPhdnz334ecm+ey5AZjrYv/89uoMS1NTk8rLy5WUlORyPCkpSaWlpR7vU1ZW5rZ+/Pjx2rdvn86ePStJGj16tMrLy7V3715J0tdff62ioiJNmtT2/+AaGxvV0NDgcgEAAP4pyJvFdXV1amlpUUREhMvxiIgI2e12j/ex2+0e1zc3N6uurk6RkZG655579M0332j06NFyOBxqbm7Wgw8+qMWLF7e5l+zsbGVmZnqzfQAA0E216023FovF5brD4XA7dqH1Pz2+Y8cOPfPMM1qzZo3279+vt956S++8846efvrpNh8zIyND9fX1zsvRo0fbMwoAAOgGvDrD0q9fPwUGBrqdTamtrXU7i3JO//79Pa4PCgpSeHi4JOnJJ5/UzJkzlZycLEm67rrr9P333+uBBx7Q0qVLFRDg3lWhoaEKDQ31ZvsAAKCb8uoMS0hIiGw2m0pKSlyOl5SUaNSoUR7vk5CQ4La+uLhY8fHxCg4OliSdOXPGLUoCAwPlcDjk5XuCAQCAH/L6JaH09HStX79eBQUFqqysVFpamqqrq5WSkiLpx5dqZs2a5VyfkpKiI0eOKD09XZWVlSooKFB+fr4WLlzoXDNlyhTl5uZq8+bNqqqqUklJiZ588kndeeedCgwM7IAxAQBAd+bVS0KSNGPGDJ08eVJZWVmqqanRiBEjVFRUpJiYGElSTU2Ny2eyxMbGqqioSGlpaVq9erWioqK0atUqTZ8+3blm2bJlslgsWrZsmY4dO6Yrr7xSU6ZM0TPPPNMBIwIAgO7O689hMRWfwwJcGJ/DAsA0nfI5LAAAAL5AsAAAAOMRLAAAwHgECwAAMB7BAgAAjEewAAAA4xEsAADAeAQLAAAwHsECAACMR7AAAADjESwAAMB4Xn/5IfwL3y0DAOgOOMMCAACMR7AAAADjESwAAMB4BAsAADAeb7oFAMAP+PsvUXCGBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9va/5//P1bLoGejJ9voPvjDAsAADAewQIAAIxHsAAAAOMRLAAAwHgECwAAMB7BAgAAjEewAAAA4xEsAADAeAQLAAAwHsECAACMR7AAAADjESwAAMB4BAsAADAewQIAAIxHsAAAAOMRLAAAwHgECwAAMB7BAgAAjEewAAAA4xEsAADAeAQLAAAwHsECAACMR7AAAADjESwAAMB4BAsAADAewQIAAIxHsAAAAOMRLAAAwHgECwAAMB7BAgAAjEewAAAA4xEsAADAeAQLAAAwXruCZc2aNYqNjZXVapXNZtOuXbvOu37nzp2y2WyyWq0aPHiw8vLy3NZ8++23euihhxQZGSmr1aq4uDgVFRW1Z3sAAMDPeB0shYWFWrBggZYuXaqKigolJiZqwoQJqq6u9ri+qqpKEydOVGJioioqKrRkyRLNnz9fW7duda5pamrSuHHjdPjwYW3ZskVffPGF1q1bpwEDBrR/MgAA4DeCvL3DypUrNXfuXCUnJ0uScnJy9I9//EO5ubnKzs52W5+Xl6eBAwcqJydHkhQXF6d9+/ZpxYoVmj59uiSpoKBAp06dUmlpqYKDgyVJMTEx7Z0JAAD4Ga/OsDQ1Nam8vFxJSUkux5OSklRaWurxPmVlZW7rx48fr3379uns2bOSpLffflsJCQl66KGHFBERoREjRujZZ59VS0tLm3tpbGxUQ0ODywUAAPgnr4Klrq5OLS0tioiIcDkeEREhu93u8T52u93j+ubmZtXV1UmSvv76a23ZskUtLS0qKirSsmXL9OKLL+qZZ55pcy/Z2dkKCwtzXqKjo70ZBQAAdCPtetOtxWJxue5wONyOXWj9T4+3trbqqquu0tq1a2Wz2XTPPfdo6dKlys3NbfMxMzIyVF9f77wcPXq0PaMAAIBuwKv3sPTr10+BgYFuZ1Nqa2vdzqKc079/f4/rg4KCFB4eLkmKjIxUcHCwAgMDnWvi4uJkt9vV1NSkkJAQt8cNDQ1VaGioN9sHAADdlFdnWEJCQmSz2VRSUuJyvKSkRKNGjfJ4n4SEBLf1xcXFio+Pd77B9pZbbtGhQ4fU2trqXHPw4EFFRkZ6jBUAANCzeP2SUHp6utavX6+CggJVVlYqLS1N1dXVSklJkfTjSzWzZs1yrk9JSdGRI0eUnp6uyspKFRQUKD8/XwsXLnSuefDBB3Xy5Ek9+uijOnjwoN599109++yzeuihhzpgRAAA0N15/WvNM2bM0MmTJ5WVlaWamhqNGDFCRUVFzl9DrqmpcflMltjYWBUVFSktLU2rV69WVFSUVq1a5fyVZkmKjo5WcXGx0tLSdP3112vAgAF69NFH9cQTT3TAiAAAoLvzOlgkKTU1VampqR5v27Bhg9uxMWPGaP/+/ed9zISEBO3Zs6c92wEAAH6O7xICAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxmvXJ90CAGCqQYvf9dlzH35uks+e299xhgUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGK9dwbJmzRrFxsbKarXKZrNp165d512/c+dO2Ww2Wa1WDR48WHl5eW2u3bx5sywWi6ZNm9aerQEAAD/kdbAUFhZqwYIFWrp0qSoqKpSYmKgJEyaourra4/qqqipNnDhRiYmJqqio0JIlSzR//nxt3brVbe2RI0e0cOFCJSYmej8JAADwW14Hy8qVKzV37lwlJycrLi5OOTk5io6OVm5ursf1eXl5GjhwoHJychQXF6fk5GTNmTNHK1ascFnX0tKiP/zhD8rMzNTgwYPbNw0AAPBLXgVLU1OTysvLlZSU5HI8KSlJpaWlHu9TVlbmtn78+PHat2+fzp496zyWlZWlK6+8UnPnzr2ovTQ2NqqhocHlAgAA/JNXwVJXV6eWlhZFRES4HI+IiJDdbvd4H7vd7nF9c3Oz6urqJEm7d+9Wfn6+1q1bd9F7yc7OVlhYmPMSHR3tzSgAAKAbadebbi0Wi8t1h8PhduxC688dP336tP74xz9q3bp16tev30XvISMjQ/X19c7L0aNHvZgAAAB0J0HeLO7Xr58CAwPdzqbU1ta6nUU5p3///h7XBwUFKTw8XJ999pkOHz6sKVOmOG9vbW39cXNBQfriiy909dVXuz1uaGioQkNDvdk+AADoprwKlpCQENlsNpWUlOg3v/mN83hJSYmmTp3q8T4JCQnatm2by7Hi4mLFx8crODhYw4YN06effupy+7Jly3T69Gm99NJLvNSDTjFo8bs+e+7Dz03y2XMDQHflVbBIUnp6umbOnKn4+HglJCRo7dq1qq6uVkpKiqQfX6o5duyYXn/9dUlSSkqKXnnlFaWnp2vevHkqKytTfn6+Nm3aJEmyWq0aMWKEy3NcdtllkuR2HAAA9ExeB8uMGTN08uRJZWVlqaamRiNGjFBRUZFiYmIkSTU1NS6fyRIbG6uioiKlpaVp9erVioqK0qpVqzR9+vSOmwIAAPg1r4NFklJTU5Wamurxtg0bNrgdGzNmjPbv33/Rj+/pMQAAQM/FdwkBAADjESwAAMB4BAsAADAewQIAAIxHsAAAAOMRLAAAwHgECwAAMB7BAgAAjEewAAAA4xEsAADAeAQLAAAwHsECAACMR7AAAADjESwAAMB4BAsAADAewQIAAIxHsAAAAOMRLAAAwHhBvt4AAKBzDFr8rs+e+/Bzk3z23PBPnGEBAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxmtXsKxZs0axsbGyWq2y2WzatWvXedfv3LlTNptNVqtVgwcPVl5ensvt69atU2Jioi6//HJdfvnlGjt2rPbu3duerQEAAD/kdbAUFhZqwYIFWrp0qSoqKpSYmKgJEyaourra4/qqqipNnDhRiYmJqqio0JIlSzR//nxt3brVuWbHjh2699579eGHH6qsrEwDBw5UUlKSjh071v7JAACA3/A6WFauXKm5c+cqOTlZcXFxysnJUXR0tHJzcz2uz8vL08CBA5WTk6O4uDglJydrzpw5WrFihXPNG2+8odTUVI0cOVLDhg3TunXr1Nraqu3bt7d/MgAA4De8CpampiaVl5crKSnJ5XhSUpJKS0s93qesrMxt/fjx47Vv3z6dPXvW433OnDmjs2fP6oorrmhzL42NjWpoaHC5AAAA/+RVsNTV1amlpUUREREuxyMiImS32z3ex263e1zf3Nysuro6j/dZvHixBgwYoLFjx7a5l+zsbIWFhTkv0dHR3owCAAC6kXa96dZisbhcdzgcbscutN7TcUl6/vnntWnTJr311luyWq1tPmZGRobq6+udl6NHj3ozAgAA6EaCvFncr18/BQYGup1Nqa2tdTuLck7//v09rg8KClJ4eLjL8RUrVujZZ5/V+++/r+uvv/68ewkNDVVoaKg32wcAAN2UV2dYQkJCZLPZVFJS4nK8pKREo0aN8nifhIQEt/XFxcWKj49XcHCw89gLL7ygp59+Wu+9957i4+O92RYAAPBzXr8klJ6ervXr16ugoECVlZVKS0tTdXW1UlJSJP34Us2sWbOc61NSUnTkyBGlp6ersrJSBQUFys/P18KFC51rnn/+eS1btkwFBQUaNGiQ7Ha77Ha7vvvuuw4YEQAAdHdevSQkSTNmzNDJkyeVlZWlmpoajRgxQkVFRYqJiZEk1dTUuHwmS2xsrIqKipSWlqbVq1crKipKq1at0vTp051r1qxZo6amJt11110uz7V8+XL96U9/audoAADAX3gdLJKUmpqq1NRUj7dt2LDB7diYMWO0f//+Nh/v8OHD7dkGAADoIfguIQAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxiNYAACA8QgWAABgPIIFAAAYj2ABAADGI1gAAIDxCBYAAGA8ggUAABiPYAEAAMYjWAAAgPEIFgAAYDyCBQAAGI9gAQAAxmtXsKxZs0axsbGyWq2y2WzatWvXedfv3LlTNptNVqtVgwcPVl5entuarVu3avjw4QoNDdXw4cP117/+tT1bAwAAfsjrYCksLNSCBQu0dOlSVVRUKDExURMmTFB1dbXH9VVVVZo4caISExNVUVGhJUuWaP78+dq6datzTVlZmWbMmKGZM2fqwIEDmjlzpu6++259/PHH7Z8MAAD4Da+DZeXKlZo7d66Sk5MVFxennJwcRUdHKzc31+P6vLw8DRw4UDk5OYqLi1NycrLmzJmjFStWONfk5ORo3LhxysjI0LBhw5SRkaE77rhDOTk57R4MAAD4jyBvFjc1Nam8vFyLFy92OZ6UlKTS0lKP9ykrK1NSUpLLsfHjxys/P19nz55VcHCwysrKlJaW5rbmfMHS2NioxsZG5/X6+npJUkNDgzcjObU2nmnX/TpCe/fcEZi76zF312PursfcXa+7zn3uvg6H47zrvAqWuro6tbS0KCIiwuV4RESE7Ha7x/vY7XaP65ubm1VXV6fIyMg217T1mJKUnZ2tzMxMt+PR0dEXO44xwnJ8vQPfYO6ehbl7FubuWTpi7tOnTyssLKzN270KlnMsFovLdYfD4XbsQuv/+7i3j5mRkaH09HTn9dbWVp06dUrh4eHnvV9naGhoUHR0tI4ePaq+fft26XP7EnMzd0/A3MzdE/hybofDodOnTysqKuq867wKln79+ikwMNDtzEdtba3bGZJz+vfv73F9UFCQwsPDz7umrceUpNDQUIWGhrocu+yyyy52lE7Rt2/fHvUf+DnM3bMwd8/C3D2Lr+Y+35mVc7x6021ISIhsNptKSkpcjpeUlGjUqFEe75OQkOC2vri4WPHx8QoODj7vmrYeEwAA9CxevySUnp6umTNnKj4+XgkJCVq7dq2qq6uVkpIi6ceXao4dO6bXX39dkpSSkqJXXnlF6enpmjdvnsrKypSfn69NmzY5H/PRRx/Vrbfeqj//+c+aOnWq/v73v+v999/XP//5zw4aEwAAdGdeB8uMGTN08uRJZWVlqaamRiNGjFBRUZFiYmIkSTU1NS6fyRIbG6uioiKlpaVp9erVioqK0qpVqzR9+nTnmlGjRmnz5s1atmyZnnzySV199dUqLCzUTTfd1AEjdr7Q0FAtX77c7SUqf8fczN0TMDdz9wTdYW6L40K/RwQAAOBjfJcQAAAwHsECAACMR7AAAADjESwAAMB4BAsAADAewQIAAIxHsABecjgcam1t9fU2utyGDRuc34oO//bll19q+/btOnTokK+3gk7S0tLicn3v3r3as2ePGhsbfbSjCyNYOtCBAwcUGBjo6210infffVfJyclatGiR/v3vf7vc9p///Ee33367j3bWeZqbm7Vs2TKNGTNGy5cvlyS98MIL6t27t3r16qXZs2erqanJx7vsOg888ICOHz/u6210muLiYjU3Nzuvb9y4USNHjtSll16qIUOGaNWqVT7cXed57rnn9MEHH0j68Wd57NixGjp0qMaNG6ehQ4dqwoQJ+vbbb327yU7Qp08fzZ07V6Wlpb7eSpc6fPiwbDabQkNDNWnSJDU0NGjcuHG6+eabNWrUKA0fPlwHDx709TY9Ilg6mD9+Dt/GjRs1depU2e12lZWV6Re/+IXeeOMN5+1NTU3auXOnD3fYOTIzM7V+/XrFx8dry5YtevDBB/Xyyy9r7dq1Wr9+vT744APl5OT4epsd7oorrvB4aW5uVkJCgvO6v5kwYYJOnTolSdq6datmzZqlW2+9VevWrdO0adO0aNEil68U8Re5ubnq16+fJGnRokU6deqUysvLdebMGe3fv1/ffvutFi5c6ONddrzvv/9eH3/8sUaPHq24uDi9+OKLqq2t9fW2Ot3ChQvVp08f/e1vf1Pv3r01ceJENTc36+jRozp27JiuueYaPfHEE77epkd80q0Xfvvb35739vr6eu3YscPtVFt3d+ONN+r+++/XI488IknasmWL7r//fuXk5Gju3Lk6ceKEoqKi/G7uq6++Wi+99JImT56sQ4cOaejQodq4caNmzJghSfrLX/6irKwsffrppz7eacfq06ePxowZo9/97nfOYw6HQ8nJycrKytKAAQMkSbNnz/bVFjtFQECA7Ha7rrrqKo0ePVp33HGHMjMznbevWLFCb775pvbu3evDXXY8q9WqL774QjExMYqNjdVrr72mW2+91Xl7eXm5pkyZ4ndn1879+66pqdH69eu1ceNGfffdd5o8ebKSk5P161//WhaLxdfb7HBXXXWViouLNXLkSNXX1+vyyy/XRx99pNGjR0uS9u/fr4kTJ8put/t4p+44w+KFbdu26YcfflBYWJjHS+/evX29xU5x8OBBTZ482Xn9rrvu0rZt25SWlqa8vDwf7qxzHT9+XDfccIMkaciQIQoJCXFel6T4+HgdOXLEV9vrNBUVFaqtrdUHH3yg6dOna/bs2brvvvtksVg0bdo0zZ492+9i5b99+eWXmjp1qsuxO++809hT5f+LmJgY/etf/5IkWSwWBQW5fsVcYGCgvv/+e19srUvccMMNevnll1VTU+N8n9bkyZM1cOBAPfXUU77eXoc792eY9ONfTgIDA9WnTx/n7X379tWZM2d8tb3zIli8EBcXp+nTp+vVV1/1ePnp38b8Sd++fXXixAmXY7/61a+0bds2Pf7443r55Zd9tLPOFRYW5vLa/Y033ujyg93Y2OiXfwMbMmSISktL1b9/f40cOVK7d+/29Za6zOeff65PPvlEvXr1cntjdWtrq9+dRZSkefPm6fHHH9ehQ4f08MMPa+HChfrqq68kSVVVVUpLS1NSUpKPd9nx/vtnNyQkRPfee6/ef/99ffXVV7rvvvu0YcMG32yuE/385z9XQUGBJOm1115TeHi4Nm/e7Lx906ZNuvbaa321vfNz4KLdd999jtTU1DZv//zzzx2DBg3qwh11jalTpzqeeuopj7d9+OGHjksvvdQREBDQxbvqfLfddptjw4YNbd7+5ptvOmw2WxfuqOtt377dMXDgQEdGRoYjODjY8dlnn/l6S53GYrE4AgICHBaLxWGxWBw5OTkut2/cuNExfPhwH+2ucz3yyCOO4OBgx7BhwxxWq9UREBDgCAkJcQQEBDji4+MdNTU1vt5ih7NYLI4TJ06cd01ra2sX7abrvPfeew6r1eoICQlx9OrVy/HRRx85rr32Wscvf/lLx8033+wIDAx0FBYW+nqbHvEeFi80NjaqpaVFl1xyia+30qV27typ0tJSZWRkeLx9x44deu211/Tqq6928c4618GDBxUcHKzY2FiPt2/cuFFBQUG6++67u3hnXevkyZOaN2+ePvzwQ+3Zs0dDhw719ZY6xX+/vNe7d2+Fh4c7r7/++uuSpFmzZnXpvrpKZWWl3nnnHX399ddqbW1VZGSkbrnlFo0dO9YvzyRmZmbq8ccf73H/P5d+PHO2f/9+xcfHKyYmRidOnNDq1at15swZTZo0Sbfddpuvt+gRwQIAAIwXdOEl8KS1tVWHDh1SbW2t22vdP32Hvb9hbuY+h7n9D3Mz9zkmzk2wtMOePXv0+9//XkeOHHH73BWLxeKXb8yTmJu5/z/m9j/MzdznmDo3Lwm1w8iRI3XttdcqMzNTkZGRbq/vnvuVMX/D3Mz9U8ztX5ibuX/KxLkJlna49NJLdeDAAQ0ZMsTXW+lSzM3cPQFzM3dP0B3n5nNY2uGmm27qkV8Kxtw9C3P3LMzds3THuXkPy0X65JNPnP/8yCOP6LHHHpPdbtd1112n4OBgl7XXX399V2+v0zA3czM3c0vM7Q+6+9y8JHSRAgICZLFY2vxyw3O3mfpmpfZibub+KeZmbn/A3N1zbs6wXKSqqipfb8EnmLtnYe6ehbl7lu4+N2dY2iE7O1sRERGaM2eOy/GCggJ98803xn419/+KuZlbYm7m9i/M3Y3m7txP/vdPMTExjt27d7sd37Nnj19+l9A5zO2Kuf0Tc7tibv/UHefmt4TawW63KzIy0u34lVdeqZqaGh/sqGswtyvm9k/M7Yq5/VN3nJtgaYfo6Gjt3r3b7fju3bsVFRXlgx11DeZ2xdz+ibldMbd/6o5z86bbdkhOTtaCBQt09uxZ3X777ZKk7du3a9GiRXrsscd8vLvOw9zMzdzM7W+YuxvN7evXpLqj1tZWx6JFixxWq9UREBDgCAgIcFxyySWOzMxMX2+tUzE3czO3/2Ju5jZ9bn5L6H/w3XffqbKyUr169dI111yj0NBQX2+pSzA3c/cEzM3cPUF3mptgAQAAxuNNtwAAwHgECwAAMB7BAgAAjEewAAAA4xEsAADAeAQLAAAwHsECAACM938RZ2Vn8iTIEAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df['caesar'].plot(kind = 'bar')" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "dc8ac837-e6e4-42a3-a249-546d14d03985", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8, 295)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# principal component analysis\n", + "\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "c4fdc6a7-7531-4e73-84ad-2e91d5f60d35", + "metadata": {}, + "outputs": [], + "source": [ + "# we will collapse our 295 components (most frequent words) down to just 2\n", + "# so that we can more easily plot the data\n", + "\n", + "pca = PCA(n_components = 2) # we want 2 components as output" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "9938feed-9a13-4a67-8269-6c10bef2f3e6", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/tim/predictive_analytics/DATA301_final_project/envs/lib/python3.9/site-packages/sklearn/utils/validation.py:727: FutureWarning: np.matrix usage is deprecated in 1.0 and will raise a TypeError in 1.2. Please convert to a numpy array with np.asarray. For more information see: https://numpy.org/doc/stable/reference/generated/numpy.matrix.html\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "corpus_reduced = pca.fit_transform(text_frequencies) # use the original matrix, not the dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "699fd0a8-6a56-496d-be54-7a934d95a0a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8, 2)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "corpus_reduced.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "a4c604a0-bd61-4e4a-8acb-3b87a15238d9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0.17327172, 0.0398041 ],\n", + " [-0.01712081, -0.02159975],\n", + " [-0.04462241, -0.05325602],\n", + " [ 0.0080804 , -0.08425615],\n", + " [ 0.17327172, 0.0398041 ],\n", + " [-0.09377177, -0.04113045],\n", + " [-0.07100618, -0.07465533],\n", + " [-0.12810267, 0.19528951]])" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "corpus_reduced" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "d0972829-9ffb-46bf-b6d2-925dbf79570a", + "metadata": {}, + "outputs": [], + "source": [ + "# make the matrix a dataframe\n", + "\n", + "# df_corpus_reduced = pd.DataFrame(corpus_reduced, columns = ['First Component', 'Second Component']),\n", + "\n", + "df_corpus_reduced = pd.DataFrame(corpus_reduced,\n", + " columns = ['First Component', 'Second Component'],\n", + " index = ['ch1', 'ch2', 'ch3', 'ch4', 'ch5', 'ch6', 'ch7', 'ch8'])" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a27ea5bd-ef41-44d8-87e3-2dfdf5ccdd66", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
First ComponentSecond Component
ch10.1732720.039804
ch2-0.017121-0.021600
ch3-0.044622-0.053256
ch40.008080-0.084256
ch50.1732720.039804
ch6-0.093772-0.041130
ch7-0.071006-0.074655
ch8-0.1281030.195290
\n", + "
" + ], + "text/plain": [ + " First Component Second Component\n", + "ch1 0.173272 0.039804\n", + "ch2 -0.017121 -0.021600\n", + "ch3 -0.044622 -0.053256\n", + "ch4 0.008080 -0.084256\n", + "ch5 0.173272 0.039804\n", + "ch6 -0.093772 -0.041130\n", + "ch7 -0.071006 -0.074655\n", + "ch8 -0.128103 0.195290" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_corpus_reduced" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "4d706c29-b70c-4602-88e0-b87e37b8b0c4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGdCAYAAAD0e7I1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA1OUlEQVR4nO3dfVyUZaL/8e/wNGgrk4YCJiKZhkg+gKlgmOaKmJZtnSNqYe2qxe52fFpPG5E/xT2ldNpWy7TNTGvzaUvb9Kxr0ulodiArZGwVczUhPAYhpgOeXUHg/v3hz/k1Diggw8PN5/163a/XzHVf9zXXdb1G58v9aDEMwxAAAIBJebV0BwAAADyJsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEzNp6U70BJqamr07bffqlOnTrJYLC3dHQAAUA+GYai8vFzdu3eXl1f999e0y7Dz7bffKjQ0tKW7AQAAGuHkyZPq0aNHveu3y7DTqVMnSZcmKyAgoIV7AwAA6qOsrEyhoaHO3/H6apdh5/Khq4CAAMIOAABtTENPQeEEZQAAYGqEHQAAYGqEHQAAYGqEnRZQUFAgi8Uiu93e0l0BAMD0miXsrFq1SuHh4fL391dMTIz27dtXZ91t27Zp7Nix6tq1qwICAhQbG6sPPvjArd7WrVsVGRkpq9WqyMhIvffee54cQov44IMPNHz4cHXq1Eldu3bVgw8+qPz8/JbuFgAAbYrHw86WLVs0d+5cpaWlKTc3V/Hx8Ro/frwKCwtrrf/xxx9r7Nix2rlzp3JycjR69Gjde++9ys3NddbJzs5WUlKSkpOTdfDgQSUnJ2vy5Mnav3+/p4fTbE6cOKFJkybp7rvvlt1u1wcffKDS0lI98MADLd01AADaFsPDhg4daqSkpLiURUREGE899VS924iMjDTS09Od7ydPnmwkJia61Bk3bpwxZcqUerXncDgMSYbD4ah3HxqjurraWLZsmdG7d2/Dz8/PCA0NNf7t3/7NyM/PNyQZW7duNUaNGmV06NDBGDBggJGVleXc9p133jF8fHyM6upqZ9n27dsNi8ViVFZWerTfAAC0Ro39/fbonp3Kykrl5OQoISHBpTwhIUFZWVn1aqOmpkbl5eXq0qWLsyw7O9utzXHjxtXZZkVFhcrKylyW5pCamqqMjAwtXLhQeXl52rhxo4KCgpzr09LStGDBAtntdvXt21dTp05VVVWVJGnIkCHy9vbWunXrVF1dLYfDoT/84Q9KSEiQr69vs/QfAAAz8OhNBUtLS1VdXe3yAy9JQUFBKi4urlcbv/3tb/W///u/mjx5srOsuLi4QW0uXbpU6enpDex9w1XXGPos/3uVlF/QDZaLWrFihVauXKlHHnlEktS7d2/deeedKigokCQtWLBAEyZMkCSlp6erf//+On78uCIiItSrVy/t3r1b//zP/6zHH39c1dXVio2N1c6dOz0+DgAAzKRZTlC+8k6HhmHU6+6HmzZt0uLFi7VlyxZ169at0W2mpqbK4XA4l5MnTzZwBNe261CR7sz4SFPXfKo5m+1K/u02VVRUyKfH7XVuM2DAAOfrkJAQSVJJSYmkS4Fu5syZeuSRR/T5559r79698vPz0z/90z/JMIwm7z8AAGbl0T07gYGB8vb2dtvjUlJS4rZn5kpbtmzRjBkz9M477+jHP/6xy7rg4OAGtWm1WmW1WhsxgvrZdahIP3/7gH4YQSy+lz7vmT8dUnCPnkqMCnHb7oeHoy4HtZqaGknSK6+8ooCAAD3//PPOOm+//bZCQ0O1f/9+DR8+3AMjAQDAfDy6Z8fPz08xMTHKzMx0Kc/MzFRcXFyd223atEmPPvqoNm7c6DzM80OxsbFube7evfuqbXpKdY2h9B15unJfi2/n7rL4WHXhm4NK35Gn6pqG7Y35+9//Lm9vb5eyy+8vByIAAHBtHn8Q6Pz585WcnKwhQ4YoNjZWr732mgoLC5WSkiLp0iGmU6dO6a233pJ0KehMnz5dK1as0PDhw517cDp06CCbzSZJmjNnjkaOHKmMjAxNmjRJ77//vj788EN98sknnh6Om8/yv1eR44JbucXHTwHDHtTZPet0zNtH2/bYFNqxSocPH9aYMWOu2e6ECRP0u9/9TkuWLNHUqVNVXl6up59+WmFhYRo8eLAnhgIAgCl5/JydpKQkLV++XEuWLNGgQYP08ccfa+fOnQoLC5MkFRUVudxz5/e//72qqqr0y1/+UiEhIc5lzpw5zjpxcXHavHmz1q1bpwEDBmj9+vXasmWLhg0b5unhuCkpdw86l9lGTFHAHT/RuX0bNG1cnJKSkpzn5FzL3XffrY0bN+pPf/qTBg8erMTERFmtVu3atUsdOnRoqu4DAGB6FqMdnu1aVlYmm80mh8OhgICA62or++szmrrm02vW2zRruGJ733RdnwUAQHvW2N9vno11nYaGd1GIzV91XVtmkRRi89fQ8C511AAAAJ5E2LlO3l4WLbo3UpLcAs/l94vujZS317UvtQcAAE2PsNMEEqNCtPrhaAXb/F3Kg23+Wv1wdK2XnQMAgObh8aux2ovEqBCNjQx23kG5W6dLh67YowMAQMsi7DQhby8LJyEDANDKcBgLAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYWrOEnVWrVik8PFz+/v6KiYnRvn376qxbVFSkadOm6bbbbpOXl5fmzp3rVmf9+vWyWCxuy4ULFzw4CgAA0BZ5POxs2bJFc+fOVVpamnJzcxUfH6/x48ersLCw1voVFRXq2rWr0tLSNHDgwDrbDQgIUFFRkcvi7+/vqWEAAIA2yuNh58UXX9SMGTM0c+ZM9evXT8uXL1doaKhWr15da/1evXppxYoVmj59umw2W53tWiwWBQcHuywAAABX8mjYqaysVE5OjhISElzKExISlJWVdV1tnz9/XmFhYerRo4cmTpyo3NzcOutWVFSorKzMZQEAAO2DR8NOaWmpqqurFRQU5FIeFBSk4uLiRrcbERGh9evXa/v27dq0aZP8/f01YsQIHTt2rNb6S5culc1mcy6hoaGN/mwAANC2NMsJyhaLxeW9YRhuZQ0xfPhwPfzwwxo4cKDi4+P1xz/+UX379tXLL79ca/3U1FQ5HA7ncvLkyUZ/NgAAaFt8PNl4YGCgvL293fbilJSUuO3tuR5eXl6644476tyzY7VaZbVam+zzAABA2+HRPTt+fn6KiYlRZmamS3lmZqbi4uKa7HMMw5DdbldISEiTtQkAAMzBo3t2JGn+/PlKTk7WkCFDFBsbq9dee02FhYVKSUmRdOkQ06lTp/TWW285t7Hb7ZIunYR8+vRp2e12+fn5KTIyUpKUnp6u4cOHq0+fPiorK9NLL70ku92uV155xdPDAQAAbYzHw05SUpLOnDmjJUuWqKioSFFRUdq5c6fCwsIkXbqJ4JX33Bk8eLDzdU5OjjZu3KiwsDAVFBRIks6dO6fHHntMxcXFstlsGjx4sD7++GMNHTrU08MBAABtjMUwDKOlO9HcysrKZLPZ5HA4FBAQ0NLdAQAA9dDY32+ejQUAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEyNsAMAAEytWcLOqlWrFB4eLn9/f8XExGjfvn111i0qKtK0adN02223ycvLS3Pnzq213tatWxUZGSmr1arIyEi99957Huo9AABoyzwedrZs2aK5c+cqLS1Nubm5io+P1/jx41VYWFhr/YqKCnXt2lVpaWkaOHBgrXWys7OVlJSk5ORkHTx4UMnJyZo8ebL279/vyaEAAIA2yGIYhuHJDxg2bJiio6O1evVqZ1m/fv10//33a+nSpVfddtSoURo0aJCWL1/uUp6UlKSysjL95S9/cZYlJiaqc+fO2rRp0zX7VFZWJpvNJofDoYCAgIYNCAAAtIjG/n57dM9OZWWlcnJylJCQ4FKekJCgrKysRrebnZ3t1ua4cePqbLOiokJlZWUuCwAAaB88GnZKS0tVXV2toKAgl/KgoCAVFxc3ut3i4uIGtbl06VLZbDbnEhoa2ujPBgAAbUuznKBssVhc3huG4VbmyTZTU1PlcDicy8mTJ6/rswEAQNvh48nGAwMD5e3t7bbHpaSkxG3PTEMEBwc3qE2r1Sqr1drozwMAAG2XR/fs+Pn5KSYmRpmZmS7lmZmZiouLa3S7sbGxbm3u3r37utoEAADm5NE9O5I0f/58JScna8iQIYqNjdVrr72mwsJCpaSkSLp0iOnUqVN66623nNvY7XZJ0vnz53X69GnZ7Xb5+fkpMjJSkjRnzhyNHDlSGRkZmjRpkt5//319+OGH+uSTTzw9HAAA0MZ4POwkJSXpzJkzWrJkiYqKihQVFaWdO3cqLCxM0qWbCF55z53Bgwc7X+fk5Gjjxo0KCwtTQUGBJCkuLk6bN2/WM888o4ULF6p3797asmWLhg0b5unhAACANsbj99lpjbjPDgAAbU+rvM8OAABASyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAU2uWsLNq1SqFh4fL399fMTEx2rdv31Xr7927VzExMfL399ctt9yiV1991WX9+vXrZbFY3JYLFy54chgAAKAN8njY2bJli+bOnau0tDTl5uYqPj5e48ePV2FhYa318/Pzdc899yg+Pl65ubl6+umnNXv2bG3dutWlXkBAgIqKilwWf39/Tw8HAAC0MRbDMAxPfsCwYcMUHR2t1atXO8v69eun+++/X0uXLnWr/+tf/1rbt2/XkSNHnGUpKSk6ePCgsrOzJV3aszN37lydO3euUX0qKyuTzWaTw+FQQEBAo9oAAADNq7G/3x7ds1NZWamcnBwlJCS4lCckJCgrK6vWbbKzs93qjxs3Tl988YUuXrzoLDt//rzCwsLUo0cPTZw4Ubm5uXX2o6KiQmVlZS4LAABoHzwadkpLS1VdXa2goCCX8qCgIBUXF9e6TXFxca31q6qqVFpaKkmKiIjQ+vXrtX37dm3atEn+/v4aMWKEjh07VmubS5culc1mcy6hoaFNMDoAANAWNMsJyhaLxeW9YRhuZdeq/8Py4cOH6+GHH9bAgQMVHx+vP/7xj+rbt69efvnlWttLTU2Vw+FwLidPnrye4QAAgDbEo2EnMDBQ3t7ebntxSkpK3PbeXBYcHFxrfR8fH9100021buPl5aU77rijzj07VqtVAQEBLgsAAGg5n3zyiSwWi7Zs2eLxz/Jo2PHz81NMTIwyMzNdyjMzMxUXF1frNrGxsW71d+/erSFDhsjX17fWbQzDkN1uV0hISNN0HAAAtLijR4+63GLGZrNJkj788MMGtePxw1jz58/X66+/rjfeeENHjhzRvHnzVFhYqJSUFEmXDjFNnz7dWT8lJUXffPON5s+fryNHjuiNN97Q2rVrtWDBAmed9PR0ffDBBzpx4oTsdrtmzJghu93ubBMAALR91dXVkqR58+Zp1KhRGjNmjCRp5MiRDWrH42EnKSlJy5cv15IlSzRo0CB9/PHH2rlzp8LCwiRJRUVFLvfcCQ8P186dO7Vnzx4NGjRIv/nNb/TSSy/pwQcfdNY5d+6cHnvsMfXr108JCQk6deqUPv74Yw0dOtTTwwEAAA1QVVWl8ePHy9fXVxaLRT4+Pho7dqxzfU5Ojjp37iyLxaIOHTrotddec67r2LGjJGn69OkKCwtz3k/Pz8+vQX3w+H12WiPuswMAQPMYNmyYPv/8c82YMUNTp07VV199pf3792vWrFmKj4+Xn5+fUlNTFRsbqxkzZqi4uFjnz5+Xv7+/CgoKFB4ertDQUJ0+fVodOnTQ2bNnW9d9dgAAQPtSWVWjtftO6P+8f0i/fS9bn332mZKTk7VmzRrdfffd+sUvfqE333zTWX/q1KlavHixxo0bp1WrVqm6ulofffSRJOlHP/qRXnzxRb377rsaM2aM80Klhp7U7NN0wwMAAO3Z0p15WrMvXzX/75hR+ZeXLjjyixxT5zajR492vr799tslSV9//bWkS1d1z5s3z/lako4fP64VK1Zo1qxZ9e4Xe3YAAMB1W7ozT7//+P8HHUnysl4652b7wW+1dGderdv98LmWXl6XYsnlE5PrcjkM1RdhBwAAXJfKqhqt2ZfvVt4hbKAk6fyXmVqzL1+VVTVN8nl13auvLhzGAgAA1+UP2QUue3Qu8/L/kayht+vvhz/SaR9fPR18VhE3XNAnn3yimTNnXrPdN998U999951uvfVWFRYWqqCgQJJ07733Nqh/hB0AAHBdvvn+73Wu6zblNzr9x8U6/2Wmfjv3A3l7ezvvl1MfzzzzjMuDwCVp5cqVdT4iqjZces6l5wAAXJe1+07oN38+cs16Cyf004z4Wxr9OY39/eacHQAAcF2SY3vJq+7ne0uSvCyX6rUEwg4AALgufj5emhUfftU6s+LD5efTMrGDc3YAAMB1S70nUpJc7rMjXdqjMys+3Lm+JXDODufsAADQZCqravSH7AJ98/3fFdalo5JjezXZHp3G/n6zZwcAADQZPx+v6zoJ2RM4ZwcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJgaYQcAAJhas4SdVatWKTw8XP7+/oqJidG+ffuuWn/v3r2KiYmRv7+/brnlFr366qtudbZu3arIyEhZrVZFRkbqvffe81T3AQBAG+bxsLNlyxbNnTtXaWlpys3NVXx8vMaPH6/CwsJa6+fn5+uee+5RfHy8cnNz9fTTT2v27NnaunWrs052draSkpKUnJysgwcPKjk5WZMnT9b+/fs9PRwAANDGWAzDMDz5AcOGDVN0dLRWr17tLOvXr5/uv/9+LV261K3+r3/9a23fvl1HjhxxlqWkpOjgwYPKzs6WJCUlJamsrEx/+ctfnHUSExPVuXNnbdq06Zp9Kisrk81mk8PhUEBAwPUMDwAANJPG/n57dM9OZWWlcnJylJCQ4FKekJCgrKysWrfJzs52qz9u3Dh98cUXunjx4lXr1NVmRUWFysrKXBYAANA+eDTslJaWqrq6WkFBQS7lQUFBKi4urnWb4uLiWutXVVWptLT0qnXqanPp0qWy2WzOJTQ0tLFDAgAAbUyznKBssVhc3huG4VZ2rfpXljekzdTUVDkcDudy8uTJBvUfAAC0XT6ebDwwMFDe3t5ue1xKSkrc9sxcFhwcXGt9Hx8f3XTTTVetU1ebVqtVVqu1scMAAABtmEf37Pj5+SkmJkaZmZku5ZmZmYqLi6t1m9jYWLf6u3fv1pAhQ+Tr63vVOnW1CQAA2i+P7tmRpPnz5ys5OVlDhgxRbGysXnvtNRUWFiolJUXSpUNMp06d0ltvvSXp0pVXK1eu1Pz58zVr1ixlZ2dr7dq1LldZzZkzRyNHjlRGRoYmTZqk999/Xx9++KE++eQTTw8HAAC0MR4PO0lJSTpz5oyWLFmioqIiRUVFaefOnQoLC5MkFRUVudxzJzw8XDt37tS8efP0yiuvqHv37nrppZf04IMPOuvExcVp8+bNeuaZZ7Rw4UL17t1bW7Zs0bBhwzw9HAAA0MZ4/D47rRH32QEAoO1plffZAQAAaGmEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQCSpIKCAlksFtnt9pbuCgA0KcIOgHrbs2ePJk2apJCQEN1www0aNGiQNmzY0NLdAoCrIuwAqLesrCwNGDBAW7du1Zdffqmf/exnmj59unbs2NHSXQOAOhF2gHampqZGGRkZuvXWW2W1WtWzZ089++yzzvUnTpzQ6NGj1bFjRw0cOFDZ2dnOdU8//bR+85vfKC4uTr1799bs2bOVmJio9957ryWGAgD1QtgB2pnU1FRlZGRo4cKFysvL08aNGxUUFORcn5aWpgULFshut6tv376aOnWqqqqq6mzP4XCoS5cuzdF1AGgUnnrOU8/RjpSXl6tr165auXKlZs6c6bKuoKBA4eHhev311zVjxgxJUl5envr3768jR44oIiLCrb13331XDz30kA4cOKD+/fs3yxgAtF+N/f328WCfALQC1TWGPsv/XiXlF1R6Ik8VFRUaM2ZMnfUHDBjgfB0SEiJJKikpcQs7e/bs0aOPPqo1a9YQdAC0aoQdwMR2HSpS+o48FTkuSJIqTxdIkvYeLVF4eHit2/j6+jpfWywWSZfO8/mhvXv36t5779WLL76o6dOne6DnANB0OGcHMKldh4r087cPOIOOJPl27i6Lj1ULVmzQrkNFjWp3z549mjBhgpYtW6bHHnusqboLAB7Dnh3AhKprDKXvyNOVJ+RZfPwUMOxBnd2zTk+kd9DO52bp+zOlOnz48FUPbV12OejMmTNHDz74oIqLiyVJfn5+nKQMoNVizw5gQp/lf++yR+eHbCOmKOCOn6hg93r17x+ppKQklZSU1Kvd9evX6+9//7uWLl2qkJAQ5/LAAw80ZfcBoElxNRZXY8GE3ref0pzN9mvWWzFlkCYNutnzHQKAJtDY32/27AAm1K2Tf5PWA4C2jLADmNDQ8C4KsfnLUsd6i6QQm7+GhnOeDQDzI+wAJuTtZdGieyMlyS3wXH6/6N5IeXvVFYcAwDwIO4BJJUaFaPXD0Qq2uR6qCrb5a/XD0UqMCmmhngFA8+LSc8DEEqNCNDYy2HkH5W6dLh26Yo8OgPaEsAOYnLeXRbG9b2rpbgBAi+EwFgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDgAAMDXCDq5LQUGBLBaL7HZ7S3cFAIBaEXbQLAzD0AsvvKC+ffvKarUqNDRUzz33XEt3CwDQDnBTQTSLOXPmaPfu3XrhhRd0++23y+FwqLS0tKW7BQBoB9izg3qpqalRRkaGbr31VlmtVvXs2VPPPvusc/2JEyc0evRodezYUQMHDlR2drZz3ZEjR7R69Wq9//77uu+++xQeHq5Bgwbpxz/+cUsMBQDQzhB2UC+pqanKyMjQwoULlZeXp40bNyooKMi5Pi0tTQsWLJDdblffvn01depUVVVVSZJ27NihW265Rf/xH/+h8PBw9erVSzNnztT333/fUsMBALQjFsMwjJbuRHMrKyuTzWaTw+FQQEBAS3en1SsvL1fXrl21cuVKzZw502VdQUGBwsPD9frrr2vGjBmSpLy8PPXv319HjhxRRESEUlJStH79eg0aNEj//u//rurqas2bN0+dO3fWRx991BJDAgC0QY39/eacHdSqusZwPim79ESeKioqNGbMmDrrDxgwwPk6JCREklRSUqKIiAjV1NSooqJCb731lvr27StJWrt2rWJiYnT06FHddtttnh0MAKBdI+zAza5DRUrfkacixwVJUuXpAknS3qMlCg8Pr3UbX19f52uLxSLp0nk+0qXw4+Pj4ww6ktSvXz9JUmFhIWEHAOBRnLMDF7sOFennbx9wBh1J8u3cXRYfqxas2KBdh4oa3OaIESNUVVWlr7/+2ln2t7/9TZIUFhZ2/Z0GAOAqCDtwqq4xlL4jT1eexGXx8VPAsAd1ds86PZG+XH87dlyffvqp1q5dW692f/zjHys6Olo/+9nPlJubq5ycHD3++OMaO3asy94eAAA8gbADp8/yv3fZo/NDthFTFHDHT1Swe736949UUlKSSkpK6tWul5eXduzYocDAQI0cOVITJkxQv379tHnz5qbsPgAAteKcHTiVlNcedCTJYvGSLS5JtrgkrZgySJMG3excd+UFfTfeeKNbWffu3bV169am7TAAAPXAnh04devk36T1AABoDQg7cBoa3kUhNn9Z6lhvkRRi89fQ8C7N2S0AAK4LYQdO3l4WLbo3UpLcAs/l94vujZS3V11xCACA1oewAxeJUSFa/XC0gm2uh6qCbf5a/XC0EqNCWqhnbVtBQYEsFovsdntLdwUA2h1OUIabxKgQjY0Mdt5BuVunS4eu2KPjWUePHlVKSory8vLkcDjUvXt3TZs2TYsWLXK5aSMAoGE8umfn7NmzSk5Ols1mk81mU3Jyss6dO3fVbQzD0OLFi9W9e3d16NBBo0aN0uHDh13qjBo1ShaLxWWZMmWKB0fS/nh7WRTb+yZNGnSzYnvfRNBpBr6+vpo+fbp2796to0ePavny5VqzZo0WLVrU0l0DgDbNo2Fn2rRpstvt2rVrl3bt2iW73a7k5OSrbvP888/rxRdf1MqVK/X5558rODhYY8eOVXl5uUu9WbNmqaioyLn8/ve/9+RQgHqpqalRRkaGbr31VlmtVvXs2VPPPvusc/2JEyc0evRodezYUQMHDlR2drZz3S233KKf/vSnGjhwoMLCwnTffffpoYce0r59+1piKABgGh47jHXkyBHt2rVLn376qYYNGyZJWrNmjWJjY+t8+KNhGFq+fLnS0tL0wAMPSJLefPNNBQUFaePGjXr88ceddTt27Kjg4GBPdR9olNTUVK1Zs0a/+93vdOedd6qoqEhfffWVc31aWppeeOEF9enTR2lpaZo6daqOHz8uHx/3f4rHjx/Xrl27nP8WAACN47E9O9nZ2bLZbM6gI0nDhw+XzWZTVlZWrdvk5+eruLhYCQkJzjKr1aq77rrLbZsNGzYoMDBQ/fv314IFC9z2/PxQRUWFysrKXBagKVTXGMr++ozet5/ShwcLtGLFCj3//PN65JFH1Lt3b915552aOXOms/6CBQs0YcIE9e3bV+np6frmm290/Phxlzbj4uLk7++vPn36KD4+XkuWLGnuYQGAqXhsz05xcbG6devmVt6tWzcVFxfXuY0kBQUFuZQHBQXpm2++cb5/6KGHFB4eruDgYB06dEipqak6ePCgMjMza2136dKlSk9Pb+xQgFpd+XT4im+PqqKiQj49bq9zmwEDBjhfh4RcurKtpKREERERzvItW7aovLxcBw8e1L/+67/qhRde0JNPPumhUQCA+TU47CxevPiaweHzzz+XJFks7ie1GoZRa/kPXbn+ym1mzZrlfB0VFaU+ffpoyJAhOnDggKKjo93aS01N1fz5853vy8rKFBoaetU+AFdz+enwP3wohsXXKkl65k+HFNyjZ62X6f/wqqrL3+mamhqXOpe/m5GRkaqurtZjjz2mX/3qV/L29m7iUQBA+9DgsPPEE09c88qnXr166csvv9R3333ntu706dNue24uu3wOTnFxsfOvXunSX751bSNJ0dHR8vX11bFjx2oNO1arVVar9ap9BuqrrqfD+3buLouPVRe+Oaj0Hb00NjL4uq9iMwxDFy9edHvWGACg/hocdgIDAxUYGHjNerGxsXI4HPrss880dOhQSdL+/fvlcDgUFxdX6zaXD01lZmZq8ODBkqTKykrt3btXGRkZdX7W4cOHdfHiRZeABHhKXU+Ht/j4KWDYgzq7Z52Oefto2x6bQjtW6fDhwxozZsw1292wYYN8fX11++23y2q1KicnR6mpqUpKSqr1BGYAQP147H/Qfv36KTExUbNmzXJeFv7YY49p4sSJLldiRUREaOnSpfrJT34ii8WiuXPn6rnnnlOfPn3Up08fPffcc+rYsaOmTZsmSfr666+1YcMG3XPPPQoMDFReXp5+9atfafDgwRoxYoSnhgM4Xe3p8LYRU2Tx8ta5fRs07YOV6t49RCkpKfVq18fHRxkZGfrb3/4mwzAUFhamX/7yl5o3b15TdR0A2iWP/rm4YcMGzZ4923l11X333aeVK1e61Dl69KgcDofz/ZNPPql//OMf+sUvfqGzZ89q2LBh2r17tzp16iRJ8vPz03/+539qxYoVOn/+vEJDQzVhwgQtWrSIcxrQLK721HeLxUu2uCTZ4pK0adZwxfa+ybnuykNRN954o0tZUlKSkpKSmr7DANDOWYx2eDJAWVmZbDabHA6HAgICWro7aGOqawzdmfGRih0X3M7bkS49NDXY5q9Pfn03d54GgCbU2N9vHgQKNBBPhweAtoWwAzQCT4cHgLaDSzyARuLp8ADQNhB2gOtw+enwAIDWi8NYAADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7MIWCggJZLBbZ7faW7goAoJUh7KDdWLx4sSwWi9tyww03tHTXAAAexOMi0G4sWLBAKSkpLmVjxozRHXfc0UI9AgA0B/bsoE2pqalRRkaGbr31VlmtVvXs2VPPPvusc/2JEyc0evRodezYUQMHDlR2drZz3Y9+9CMFBwc7l++++055eXmaMWNGSwwFANBMCDtoU1JTU5WRkaGFCxcqLy9PGzduVFBQkHN9WlqaFixYILvdrr59+2rq1Kmqqqqqta3XX39dffv2VXx8fHN1HwDQAiyGYRgt3YnmVlZWJpvNJofDoYCAgJbuDuqpvLxcXbt21cqVKzVz5kyXdQUFBQoPD9frr7/u3FOTl5en/v3768iRI4qIiHCpX1FRoZCQED311FN68sknm20MAIDGa+zvN+fsoFWrrjH0Wf73Kim/oNITeaqoqNCYMWPqrD9gwADn65CQEElSSUmJW9jZtm2bysvLNX36dM90HADQahB20GrtOlSk9B15KnJckCRVni6QJO09WqLw8PBat/H19XW+tlgski6d53Ol119/XRMnTlRwcHAT9xoA0NoQdtAq7TpUpJ+/fUA/PMbq27m7LD5WLVixQcE9eioxKqRRbefn5+u//uu/tH379qbpLACgVeMEZbQ61TWG0nfk6cqTySw+fgoY9qDO7lmnJ9KX62/HjuvTTz/V2rVrG9T+G2+8oZCQEI0fP77pOg0AaLUIO2h1Psv/3nno6kq2EVMUcMdPVLB7vfr3j1RSUpJKSkrq3XZNTY3Wr1+vRx99VN7e3k3VZQBAK8ZhLLQ6JeW1Bx1Jsli8ZItLki0uSSumDNKkQTc71115YeGNN97oVubl5aWTJ082bYcBAK0ae3bQ6nTr5N+k9dB28cwzAE2BsINWZ2h4F4XY/GWpY71FUojNX0PDuzRnt9AGHD9+XJ06ddKNN97Y0l0B0IoQdtDqeHtZtOjeSElyCzyX3y+6N1LeXnXFIbRHFy9e1NSpU7kjNgA3hB20SolRIVr9cLSCba6HqoJt/lr9cHSjLztH63Q9zzy77JlnnlFERIQmT57cnF0H0AZwgjJarcSoEI2NDHbeQblbp0uHrtijYz6pqalas2aNfve73+nOO+9UUVGRvvrqK+f6tLQ0vfDCC+rTp4/S0tI0depUHT9+XD4+l/4L++ijj/TOO+/Ibrdr27ZtLTUMAK0UYQetmreXRbG9b2rpbsADLj8K5JviUi1fvkIvv/yyHnnkEUlS7969deedd6qgoECStGDBAk2YMEGSlJ6erv79++v48eOKiIjQmTNn9Oijj+rtt9/mWXcAakXYAdDsfvgokIpvj6qyskKrj3VQz0NFtR6ivNozz2bNmqVp06Zp5MiRzdZ/AG0L5+wAaFaXHwVy+caRFl+rJOl0eYV+/vYB7TpU5LbN1Z559tFHH+mFF16Qj4+PfHx8NGPGDDkcDvn4+OiNN97w9HAAtAHs2QHQbGp7FMjlZ57945uD8r0xWOk78jQ2Mrje52ZlZ2erurra+f79999XRkaGsrKydPPNN19lSwDtBWEHQLOp7VEgl595dm7POlm8fVR5c6Te2LZbXo7/0ZgxY67ZZr9+/Vzef/HFF/Ly8lJUVFST9h1A20XYAdBs6noUiG3EFFm8vHVu3wZVn/9eaduDNG/2L5u5dwDMymJc+fCgdqCsrEw2m00Oh4OrN4BmlP31GU1d8+k1622aNZyr8AC4aezvNycoA2g2PAoEQEsg7ABoNjwKBEBLIOwAaFY8CgRAc+MEZQDNjkeBAGhOhB0ALYJHgQBoLhzGAgAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAAptYu76BsGIakS4+KBwAAbcPl3+3Lv+P11S7DTnl5uSQpNDS0hXsCAAAaqry8XDabrd71LUZD45EJ1NTU6Ntvv1WnTp1ksbTPBw+WlZUpNDRUJ0+eVEBAQEt3p1VjruqPuaof5qn+mKv6aw9zZRiGysvL1b17d3l51f9MnHa5Z8fLy0s9evRo6W60CgEBAab9R9HUmKv6Y67qh3mqP+aq/sw+Vw3Zo3MZJygDAABTI+wAAABTI+y0U1arVYsWLZLVam3prrR6zFX9MVf1wzzVH3NVf8xV3drlCcoAAKD9YM8OAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcKOSZ09e1bJycmy2Wyy2WxKTk7WuXPnrrrNtm3bNG7cOAUGBspischut7vVqaio0L/8y78oMDBQN9xwg+677z79z//8j2cG0UwaM1eGYWjx4sXq3r27OnTooFGjRunw4cMudUaNGiWLxeKyTJkyxYMjaXqrVq1SeHi4/P39FRMTo3379l21/t69exUTEyN/f3/dcsstevXVV93qbN26VZGRkbJarYqMjNR7773nqe43q6aeq/Xr17t9fywWiy5cuODJYTSLhsxVUVGRpk2bpttuu01eXl6aO3durfXM+L1q6nky83fqmgyYUmJiohEVFWVkZWUZWVlZRlRUlDFx4sSrbvPWW28Z6enpxpo1awxJRm5urludlJQU4+abbzYyMzONAwcOGKNHjzYGDhxoVFVVeWgknteYuVq2bJnRqVMnY+vWrcZf//pXIykpyQgJCTHKysqcde666y5j1qxZRlFRkXM5d+6cp4fTZDZv3mz4+voaa9asMfLy8ow5c+YYN9xwg/HNN9/UWv/EiRNGx44djTlz5hh5eXnGmjVrDF9fX+Pdd9911snKyjK8vb2N5557zjhy5Ijx3HPPGT4+Psann37aXMPyCE/M1bp164yAgACX709RUVFzDcljGjpX+fn5xuzZs40333zTGDRokDFnzhy3Omb8Xnlinsz6naoPwo4J5eXlGZJc/qFnZ2cbkoyvvvrqmtvn5+fXGnbOnTtn+Pr6Gps3b3aWnTp1yvDy8jJ27drVZP1vTo2Zq5qaGiM4ONhYtmyZs+zChQuGzWYzXn31VWfZXXfdVet/OG3F0KFDjZSUFJeyiIgI46mnnqq1/pNPPmlERES4lD3++OPG8OHDne8nT55sJCYmutQZN26cMWXKlCbqdcvwxFytW7fOsNlsTd7XltbQufqhuv5NmfF75Yl5Mut3qj44jGVC2dnZstlsGjZsmLNs+PDhstlsysrKanS7OTk5unjxohISEpxl3bt3V1RU1HW125IaM1f5+fkqLi52mQer1aq77rrLbZsNGzYoMDBQ/fv314IFC1ReXu6ZgTSxyspK5eTkuIxRkhISEuqcl+zsbLf648aN0xdffKGLFy9etU5b/f5InpsrSTp//rzCwsLUo0cPTZw4Ubm5uU0/gGbUmLmqD7N9rzw1T5L5vlP1RdgxoeLiYnXr1s2tvFu3biouLr6udv38/NS5c2eX8qCgoOtqtyU1Zq4ulwcFBbmUXzkPDz30kDZt2qQ9e/Zo4cKF2rp1qx544IEm7L3nlJaWqrq6+ppj/KHi4uJa61dVVam0tPSqddrq90fy3FxFRERo/fr12r59uzZt2iR/f3+NGDFCx44d88xAmkFj5qo+zPa98tQ8mfE7VV/t8qnnbdXixYuVnp5+1Tqff/65JMlisbitMwyj1vLr5al2r0dzzNWV66/cZtasWc7XUVFR6tOnj4YMGaIDBw4oOjr6mmNoDa41xvrUv7K8oW22FU09V8OHD9fw4cOd60eMGKHo6Gi9/PLLeumll5qq2y3CE98BM36vmnpMZv5OXQthpw154oknrnk1T69evfTll1/qu+++c1t3+vRpt78UGiI4OFiVlZU6e/asy96dkpISxcXFNbpdT/DkXAUHB0u69NdkSEiIs7ykpOSq8xsdHS1fX18dO3as1YedwMBAeXt7u/0VebUxBgcH11rfx8dHN91001XrXM/3sqV5aq6u5OXlpTvuuKNN/xXemLmqD7N9rzw1T1cyw3eqvjiM1YYEBgYqIiLiqou/v79iY2PlcDj02WefObfdv3+/HA7HdYWSmJgY+fr6KjMz01lWVFSkQ4cOtbqw48m5Cg8PV3BwsMs8VFZWau/evVedh8OHD+vixYsuAam18vPzU0xMjMsYJSkzM7POMcbGxrrV3717t4YMGSJfX9+r1mlt35+G8NRcXckwDNnt9jbx/alLY+aqPsz2vfLUPF3JDN+pemuJs6LheYmJicaAAQOM7OxsIzs727j99tvdLqe+7bbbjG3btjnfnzlzxsjNzTX+/Oc/G5KMzZs3G7m5uS6XJqakpBg9evQwPvzwQ+PAgQPG3XffbYpLzxs6V8uWLTNsNpuxbds2469//asxdepUl0vPjx8/bqSnpxuff/65kZ+fb/z5z382IiIijMGDB7eZubp86evatWuNvLw8Y+7cucYNN9xgFBQUGIZhGE899ZSRnJzsrH/5cup58+YZeXl5xtq1a90up/7v//5vw9vb21i2bJlx5MgRY9myZW3+EmHD8MxcLV682Ni1a5fx9ddfG7m5ucZPf/pTw8fHx9i/f3+zj68pNXSuDMMwcnNzjdzcXCMmJsaYNm2akZubaxw+fNi53ozfK0/Mk1m/U/VB2DGpM2fOGA899JDRqVMno1OnTsZDDz1knD171qWOJGPdunXO9+vWrTMkuS2LFi1y1vnHP/5hPPHEE0aXLl2MDh06GBMnTjQKCwubZ1Ae0pi5qqmpMRYtWmQEBwcbVqvVGDlypPHXv/7Vub6wsNAYOXKk0aVLF8PPz8/o3bu3MXv2bOPMmTPNNKqm8corrxhhYWGGn5+fER0dbezdu9e57pFHHjHuuusul/p79uwxBg8ebPj5+Rm9evUyVq9e7dbmO++8Y9x2222Gr6+vERERYWzdutXTw2gWTT1Xc+fONXr27Gn4+fkZXbt2NRISEoysrKzmGIrHNXSuavt/KSwszKWOGb9XTT1PZv5OXYvFMP7fWXEAAAAmxDk7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1P4vDtdVFmkZxl0AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# plot the dataframe\n", + "\n", + "# ax = df_corpus_reduced.plot(kind = scatter, x = 'First Component', y = 'Second Component')\n", + "\n", + "# plt.scatter(x = df_corpus_reduced['First Component'], y = df_corpus_reduced['Second Component'])\n", + "\n", + "plt.scatter(x = df_corpus_reduced['First Component'], y = df_corpus_reduced['Second Component'])\n", + "\n", + "# annotate the points\n", + "\n", + "for title, points in df_corpus_reduced.iterrows():\n", + " plt.annotate(title, points)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b88561d-14ff-40f3-b9d8-b4905550daf4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}