From f7d05fa34df8581ffb8b5443c730ec0da31cb33f Mon Sep 17 00:00:00 2001 From: spike Date: Thu, 30 Mar 2017 20:10:01 +0200 Subject: [PATCH] commit project 3 --- .gitattributes | 3 + ...dlnd_tv_script_generation-checkpoint.ipynb | 1350 ++ __pycache__/helper.cpython-35.pyc | Bin 0 -> 1811 bytes __pycache__/problem_unittests.cpython-35.pyc | Bin 0 -> 10543 bytes checkpoint | 2 + dlnd_tv_script_generation.html | 13645 ++++++++++++++++ dlnd_tv_script_generation.ipynb | 1809 ++ helper.py | 55 + ...ts.out.tfevents.1490895533.ip-172-31-18-64 | Bin 0 -> 582955 bytes problem_unittests.py | 296 + 10 files changed, 17160 insertions(+) create mode 100644 .gitattributes create mode 100644 .ipynb_checkpoints/dlnd_tv_script_generation-checkpoint.ipynb create mode 100644 __pycache__/helper.cpython-35.pyc create mode 100644 __pycache__/problem_unittests.cpython-35.pyc create mode 100644 checkpoint create mode 100644 dlnd_tv_script_generation.html create mode 100644 dlnd_tv_script_generation.ipynb create mode 100644 helper.py create mode 100644 logs/1/events.out.tfevents.1490895533.ip-172-31-18-64 create mode 100644 problem_unittests.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..5698bc4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +save.* filter=lfs diff=lfs merge=lfs -text +*.p filter=lfs diff=lfs merge=lfs -text +**/data filter=lfs diff=lfs merge=lfs -text diff --git a/.ipynb_checkpoints/dlnd_tv_script_generation-checkpoint.ipynb b/.ipynb_checkpoints/dlnd_tv_script_generation-checkpoint.ipynb new file mode 100644 index 0000000..0cbb481 --- /dev/null +++ b/.ipynb_checkpoints/dlnd_tv_script_generation-checkpoint.ipynb @@ -0,0 +1,1350 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# TV Script Generation\n", + "In this project, you'll generate your own [Simpsons](https://en.wikipedia.org/wiki/The_Simpsons) TV scripts using RNNs. You'll be using part of the [Simpsons dataset](https://www.kaggle.com/wcukierski/the-simpsons-by-the-data) of scripts from 27 seasons. The Neural Network you'll build will generate a new TV script for a scene at [Moe's Tavern](https://simpsonswiki.com/wiki/Moe's_Tavern).\n", + "## Get the Data\n", + "The data is already provided for you. You'll be using a subset of the original dataset. It consists of only the scenes in Moe's Tavern. This doesn't include other versions of the tavern, like \"Moe's Cavern\", \"Flaming Moe's\", \"Uncle Moe's Family Feed-Bag\", etc.." + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "import helper\n", + "\n", + "data_dir = './data/simpsons/moes_tavern_lines.txt'\n", + "text = helper.load_data(data_dir)\n", + "# Ignore notice, since we don't use it for analysing the data\n", + "text = text[81:]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Explore the Data\n", + "Play around with `view_sentence_range` to view different parts of the data." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset Stats\n", + "Roughly the number of unique words: 11492\n", + "Number of scenes: 262\n", + "Average number of sentences in each scene: 15.248091603053435\n", + "Number of lines: 4257\n", + "Average number of words in each line: 11.50434578341555\n", + "\n", + "The sentences 0 to 10:\n", + "Moe_Szyslak: (INTO PHONE) Moe's Tavern. Where the elite meet to drink.\n", + "Bart_Simpson: Eh, yeah, hello, is Mike there? Last name, Rotch.\n", + "Moe_Szyslak: (INTO PHONE) Hold on, I'll check. (TO BARFLIES) Mike Rotch. Mike Rotch. Hey, has anybody seen Mike Rotch, lately?\n", + "Moe_Szyslak: (INTO PHONE) Listen you little puke. One of these days I'm gonna catch you, and I'm gonna carve my name on your back with an ice pick.\n", + "Moe_Szyslak: What's the matter Homer? You're not your normal effervescent self.\n", + "Homer_Simpson: I got my problems, Moe. Give me another one.\n", + "Moe_Szyslak: Homer, hey, you should not drink to forget your problems.\n", + "Barney_Gumble: Yeah, you should only drink to enhance your social skills.\n", + "\n", + "\n" + ] + } + ], + "source": [ + "view_sentence_range = (0, 10)\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "import numpy as np\n", + "\n", + "print('Dataset Stats')\n", + "print('Roughly the number of unique words: {}'.format(len({word: None for word in text.split()})))\n", + "scenes = text.split('\\n\\n')\n", + "print('Number of scenes: {}'.format(len(scenes)))\n", + "sentence_count_scene = [scene.count('\\n') for scene in scenes]\n", + "print('Average number of sentences in each scene: {}'.format(np.average(sentence_count_scene)))\n", + "\n", + "sentences = [sentence for scene in scenes for sentence in scene.split('\\n')]\n", + "print('Number of lines: {}'.format(len(sentences)))\n", + "word_count_sentence = [len(sentence.split()) for sentence in sentences]\n", + "print('Average number of words in each line: {}'.format(np.average(word_count_sentence)))\n", + "\n", + "print()\n", + "print('The sentences {} to {}:'.format(*view_sentence_range))\n", + "print('\\n'.join(text.split('\\n')[view_sentence_range[0]:view_sentence_range[1]]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Implement Preprocessing Functions\n", + "The first thing to do to any dataset is preprocessing. Implement the following preprocessing functions below:\n", + "- Lookup Table\n", + "- Tokenize Punctuation\n", + "\n", + "### Lookup Table\n", + "To create a word embedding, you first need to transform the words to ids. In this function, create two dictionaries:\n", + "- Dictionary to go from the words to an id, we'll call `vocab_to_int`\n", + "- Dictionary to go from the id to word, we'll call `int_to_vocab`\n", + "\n", + "Return these dictionaries in the following tuple `(vocab_to_int, int_to_vocab)`" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import problem_unittests as tests\n", + "\n", + "def create_lookup_tables(text):\n", + " \"\"\"\n", + " Create lookup tables for vocabulary\n", + " :param text: The text of tv scripts split into words\n", + " :return: A tuple of dicts (vocab_to_int, int_to_vocab)\n", + " \"\"\"\n", + " vocab = set(text)\n", + " \n", + " vocab_to_int = {word: index for index, word in enumerate(vocab)}\n", + " int_to_vocab = {index: word for (word, index) in vocab_to_int.items()}\n", + " \n", + " return vocab_to_int, int_to_vocab\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_create_lookup_tables(create_lookup_tables)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Tokenize Punctuation\n", + "We'll be splitting the script into a word array using spaces as delimiters. However, punctuations like periods and exclamation marks make it hard for the neural network to distinguish between the word \"bye\" and \"bye!\".\n", + "\n", + "Implement the function `token_lookup` to return a dict that will be used to tokenize symbols like \"!\" into \"||Exclamation_Mark||\". Create a dictionary for the following symbols where the symbol is the key and value is the token:\n", + "- Period ( . )\n", + "- Comma ( , )\n", + "- Quotation Mark ( \" )\n", + "- Semicolon ( ; )\n", + "- Exclamation mark ( ! )\n", + "- Question mark ( ? )\n", + "- Left Parentheses ( ( )\n", + "- Right Parentheses ( ) )\n", + "- Dash ( -- )\n", + "- Return ( \\n )\n", + "\n", + "This dictionary will be used to token the symbols and add the delimiter (space) around it. This separates the symbols as it's own word, making it easier for the neural network to predict on the next word. Make sure you don't use a token that could be confused as a word. Instead of using the token \"dash\", try using something like \"||dash||\"." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def token_lookup():\n", + " \"\"\"\n", + " Generate a dict to turn punctuation into a token.\n", + " :return: Tokenize dictionary where the key is the punctuation and the value is the token\n", + " \"\"\"\n", + " \n", + " return {\n", + " '.': '||period||',\n", + " ',': '||comma||',\n", + " '\"': '||quotation_mark||',\n", + " ';': '||semicolon||',\n", + " '!': '||exclamation_mark||',\n", + " '?': '||question_mark||',\n", + " '(': '||left_parentheses',\n", + " ')': '||right_parentheses',\n", + " '--': '||dash||',\n", + " '\\n': '||return||'\n", + " }\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_tokenize(token_lookup)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Preprocess all the data and save it\n", + "Running the code cell below will preprocess all the data and save it to file." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "# Preprocess Training, Validation, and Testing Data\n", + "helper.preprocess_and_save_data(data_dir, token_lookup, create_lookup_tables)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# Check Point\n", + "This is your first checkpoint. If you ever decide to come back to this notebook or have to restart the notebook, you can start from here. The preprocessed data has been saved to disk." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "import helper\n", + "import numpy as np\n", + "import problem_unittests as tests\n", + "\n", + "int_text, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Extra hyper parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "from collections import namedtuple\n", + "\n", + "hyper_params = (('embedding_size', 128),\n", + " ('lstm_layers', 2),\n", + " ('keep_prob', 0.5)\n", + " )\n", + "\n", + "\n", + "\n", + "\n", + "Hyper = namedtuple('Hyper', map(lambda x: x[0], hyper_params))\n", + "HYPER = Hyper(*list(map(lambda x: x[1], hyper_params)))\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Build the Neural Network\n", + "You'll build the components necessary to build a RNN by implementing the following functions below:\n", + "- get_inputs\n", + "- get_init_cell\n", + "- get_embed\n", + "- build_rnn\n", + "- build_nn\n", + "- get_batches\n", + "\n", + "### Check the Version of TensorFlow and Access to GPU" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TensorFlow Version: 1.0.0\n", + "Default GPU Device: /gpu:0\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "from distutils.version import LooseVersion\n", + "import warnings\n", + "import tensorflow as tf\n", + "\n", + "# Check TensorFlow Version\n", + "assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer'\n", + "print('TensorFlow Version: {}'.format(tf.__version__))\n", + "\n", + "# Check for a GPU\n", + "if not tf.test.gpu_device_name():\n", + " warnings.warn('No GPU found. Please use a GPU to train your neural network.')\n", + "else:\n", + " print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Input\n", + "Implement the `get_inputs()` function to create TF Placeholders for the Neural Network. It should create the following placeholders:\n", + "- Input text placeholder named \"input\" using the [TF Placeholder](https://www.tensorflow.org/api_docs/python/tf/placeholder) `name` parameter.\n", + "- Targets placeholder\n", + "- Learning Rate placeholder\n", + "\n", + "Return the placeholders in the following the tuple `(Input, Targets, LearingRate)`" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def get_inputs():\n", + " \"\"\"\n", + " Create TF Placeholders for input, targets, and learning rate.\n", + " :return: Tuple (input, targets, learning rate)\n", + " \"\"\"\n", + " \n", + " # We use shape [None, None] to feed any batch size and any sequence length\n", + " input_placeholder = tf.placeholder(tf.int64, [None, None],name='input')\n", + " \n", + " # Targets are [batch_size, seq_length]\n", + " targets_placeholder = tf.placeholder(tf.int64, [None, None]) \n", + " \n", + " \n", + " learning_rate_placeholder = tf.placeholder(tf.float32)\n", + " return input_placeholder, targets_placeholder, learning_rate_placeholder\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_get_inputs(get_inputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Build RNN Cell and Initialize\n", + "Stack one or more [`BasicLSTMCells`](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/BasicLSTMCell) in a [`MultiRNNCell`](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/MultiRNNCell).\n", + "- The Rnn size should be set using `rnn_size`\n", + "- Initalize Cell State using the MultiRNNCell's [`zero_state()`](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/MultiRNNCell#zero_state) function\n", + " - Apply the name \"initial_state\" to the initial state using [`tf.identity()`](https://www.tensorflow.org/api_docs/python/tf/identity)\n", + "\n", + "Return the cell and initial state in the following tuple `(Cell, InitialState)`" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def get_init_cell(batch_size, rnn_size):\n", + " \"\"\"\n", + " Create an RNN Cell and initialize it.\n", + " :param batch_size: Size of batches\n", + " :param rnn_size: Size of RNNs\n", + " :return: Tuple (cell, initialize state)\n", + " \"\"\"\n", + " lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)\n", + " \n", + " # add a dropout wrapper\n", + " drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=HYPER.keep_prob)\n", + " \n", + " #cell = tf.contrib.rnn.MultiRNNCell([drop] * HYPER.lstm_layers)\n", + " \n", + " cell = tf.contrib.rnn.MultiRNNCell([lstm] * HYPER.lstm_layers)\n", + " \n", + " initial_state = cell.zero_state(batch_size, tf.float32)\n", + " initial_state = tf.identity(initial_state, name='initial_state')\n", + " \n", + " return cell, initial_state\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_get_init_cell(get_init_cell)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Word Embedding\n", + "Apply embedding to `input_data` using TensorFlow. Return the embedded sequence." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def get_embed(input_data, vocab_size, embed_dim):\n", + " \"\"\"\n", + " Create embedding for .\n", + " :param input_data: TF placeholder for text input.\n", + " :param vocab_size: Number of words in vocabulary.\n", + " :param embed_dim: Number of embedding dimensions\n", + " :return: Embedded input.\n", + " \"\"\"\n", + " embeddings = tf.Variable(\n", + " tf.random_uniform([vocab_size, embed_dim], -1.0, 1.0)\n", + " )\n", + " \n", + " embed = tf.nn.embedding_lookup(embeddings, input_data)\n", + " \n", + " return embed\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_get_embed(get_embed)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Build RNN\n", + "You created a RNN Cell in the `get_init_cell()` function. Time to use the cell to create a RNN.\n", + "- Build the RNN using the [`tf.nn.dynamic_rnn()`](https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn)\n", + " - Apply the name \"final_state\" to the final state using [`tf.identity()`](https://www.tensorflow.org/api_docs/python/tf/identity)\n", + "\n", + "Return the outputs and final_state state in the following tuple `(Outputs, FinalState)` " + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def build_rnn(cell, inputs):\n", + " \"\"\"\n", + " Create a RNN using a RNN Cell\n", + " :param cell: RNN Cell\n", + " :param inputs: Input text data\n", + " :return: Tuple (Outputs, Final State)\n", + " \"\"\"\n", + " ## NOTES\n", + " # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] time_major==false (default)\n", + " \n", + " outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)\n", + " final_state = tf.identity(final_state, name='final_state')\n", + " \n", + " \n", + " return outputs, final_state\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_build_rnn(build_rnn)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Build the Neural Network\n", + "Apply the functions you implemented above to:\n", + "- Apply embedding to `input_data` using your `get_embed(input_data, vocab_size, embed_dim)` function.\n", + "- Build RNN using `cell` and your `build_rnn(cell, inputs)` function.\n", + "- Apply a fully connected layer with a linear activation and `vocab_size` as the number of outputs.\n", + "\n", + "Return the logits and final state in the following tuple (Logits, FinalState) " + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "logits after reshape: Tensor(\"logits:0\", shape=(128, 5, 27), dtype=float32)\n", + "Tests Passed\n" + ] + } + ], + "source": [ + "def build_nn(cell, rnn_size, input_data, vocab_size):\n", + " \"\"\"\n", + " Build part of the neural network\n", + " :param cell: RNN cell\n", + " :param rnn_size: Size of rnns\n", + " :param input_data: Input data\n", + " :param vocab_size: Vocabulary size\n", + " :return: Tuple (Logits, FinalState)\n", + " \"\"\"\n", + " \n", + " num_outputs = vocab_size\n", + " \n", + " \n", + " ## Not sure why the unit test was made without taking into \n", + " # account we are handling dynamic tensor shape that we need to infer\n", + " # at runtime, so I made an if statement just to pass the test case\n", + " #\n", + " # Some references: https://goo.gl/vD3egn\n", + " # https://goo.gl/E8vT2M \n", + " \n", + " if input_data.get_shape().as_list()[1] is not None:\n", + " batch_size = input_data.get_shape().as_list()[0]\n", + " seq_len = input_data.get_shape().as_list()[1]\n", + " \n", + " # Infer dynamic tensor shape of input\n", + " else:\n", + " input_dims = tf.shape(input_data)\n", + " batch_size = input_dims[0]\n", + " seq_len = input_dims[1]\n", + "\n", + " ###############\n", + " # This enables test passing\n", + " ###############\n", + " \n", + "\n", + " \n", + " embed = get_embed(input_data, vocab_size, HYPER.embedding_size)\n", + " \n", + " \n", + " ## NOTES\n", + " # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] see: time_major==false (default)\n", + " \n", + " ## Output shape\n", + " ## [batch_size, time_step, rnn_size]\n", + " raw_rnn_outputs, final_state = build_rnn(cell, embed)\n", + " \n", + " # Put outputs in rows\n", + " # make the output into [batch_size*time_step, rnn_size] for easy matmul\n", + " outputs = tf.reshape(raw_rnn_outputs, [-1, rnn_size], name='rnn_output')\n", + " \n", + " \n", + " # Question, why are we using linear activation and not softmax ?\n", + " # My Guess: because seq2seq.sequence_loss has an efficient way to calculate the loss directly from logits \n", + " with tf.variable_scope('linear_layer'):\n", + " linear_w = tf.Variable(tf.truncated_normal((rnn_size, num_outputs), stddev=0.05), name='linear_w')\n", + " linear_b = tf.Variable(tf.zeros(num_outputs), name='linear_b')\n", + " \n", + " logits = tf.matmul(outputs, linear_w) + linear_b\n", + " \n", + " \n", + " \n", + " # Reshape the logits back into the original input shape -> [batch_size, seq_len, num_classes]\n", + " # We do this beceause the loss function seq2seq.sequence_loss takes as logits a shape of [batch_size,seq_len,num_decoded_symbols]\n", + " logits = tf.reshape(logits, [batch_size, seq_len, num_outputs], name='logits')\n", + " print('logits after reshape: ', logits)\n", + " \n", + " return logits, final_state\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_build_nn(build_nn)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Batches\n", + "Implement `get_batches` to create batches of input and targets using `int_text`. The batches should be a Numpy array with the shape `(number of batches, 2, batch size, sequence length)`. Each batch contains two elements:\n", + "- The first element is a single batch of **input** with the shape `[batch size, sequence length]`\n", + "- The second element is a single batch of **targets** with the shape `[batch size, sequence length]`\n", + "\n", + "If you can't fill the last batch with enough data, drop the last batch.\n", + "\n", + "For exmple, `get_batches([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], 2, 3)` would return a Numpy array of the following:\n", + "```\n", + "[\n", + " # First Batch\n", + " [\n", + " # Batch of Input\n", + " [[ 1 2 3], [ 7 8 9]],\n", + " # Batch of targets\n", + " [[ 2 3 4], [ 8 9 10]]\n", + " ],\n", + " \n", + " # Second Batch\n", + " [\n", + " # Batch of Input\n", + " [[ 4 5 6], [10 11 12]],\n", + " # Batch of targets\n", + " [[ 5 6 7], [11 12 13]]\n", + " ]\n", + "]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def get_batches(int_text, batch_size, seq_length):\n", + " \"\"\"\n", + " Return batches of input and target\n", + " :param int_text: Text with the words replaced by their ids\n", + " :param batch_size: The size of batch\n", + " :param seq_length: The length of sequence\n", + " :return: Batches as a Numpy array\n", + " \"\"\"\n", + " \n", + " slice_size = batch_size * seq_length\n", + " n_batches = int(len(int_text)/slice_size)\n", + " \n", + " # input part\n", + " _inputs = np.array(int_text[:n_batches*slice_size])\n", + " \n", + " # target part\n", + " _targets = np.array(int_text[1:n_batches*slice_size + 1])\n", + " \n", + "\n", + " # Go through all inputs, targets and split them into batch_size*seq_len list of items\n", + " # [batch, batch, ...]\n", + " inputs, targets = np.split(_inputs, n_batches), np.split(_targets, n_batches)\n", + " \n", + " # concat inputs and targets\n", + " batches = np.c_[inputs, targets]\n", + " #print(batches.shape)\n", + " \n", + " # Reshape into final batches output\n", + " batches = batches.reshape((-1, 2, batch_size, seq_length))\n", + " \n", + " return batches\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_get_batches(get_batches)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Neural Network Training\n", + "### Hyperparameters\n", + "Tune the following parameters:\n", + "\n", + "- Set `num_epochs` to the number of epochs.\n", + "- Set `batch_size` to the batch size.\n", + "- Set `rnn_size` to the size of the RNNs.\n", + "- Set `seq_length` to the length of sequence.\n", + "- Set `learning_rate` to the learning rate.\n", + "- Set `show_every_n_batches` to the number of batches the neural network should print progress." + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "# Number of Epochs\n", + "num_epochs = 100\n", + "# Batch Size\n", + "batch_size = 128\n", + "# RNN Size\n", + "rnn_size = 256\n", + "# Sequence Length\n", + "seq_length = 100\n", + "# Learning Rate\n", + "learning_rate = 1e-3\n", + "# Show stats for every n number of batches\n", + "show_every_n_batches = 1\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "save_dir = './save'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Build the Graph\n", + "Build the graph using the neural network you implemented." + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6779" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vocab_size" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "logits after reshape: Tensor(\"logits:0\", shape=(?, ?, 6779), dtype=float32)\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "from tensorflow.contrib import seq2seq\n", + "\n", + "train_graph = tf.Graph()\n", + "with train_graph.as_default():\n", + " vocab_size = len(int_to_vocab)\n", + " input_text, targets, lr = get_inputs()\n", + " input_data_shape = tf.shape(input_text)\n", + " cell, initial_state = get_init_cell(input_data_shape[0], rnn_size)\n", + " logits, final_state = build_nn(cell, rnn_size, input_text, vocab_size)\n", + "\n", + " # Probabilities for generating words\n", + " probs = tf.nn.softmax(logits, name='probs')\n", + "\n", + " # Loss function\n", + " cost = seq2seq.sequence_loss(\n", + " logits,\n", + " targets,\n", + " tf.ones([input_data_shape[0], input_data_shape[1]]))\n", + "\n", + " # Optimizer\n", + " optimizer = tf.train.AdamOptimizer(lr)\n", + "\n", + " # Gradient Clipping\n", + " gradients = optimizer.compute_gradients(cost)\n", + " capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients]\n", + " train_op = optimizer.apply_gradients(capped_gradients)" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "5" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "batches = get_batches(int_text, batch_size, seq_length)\n", + "len(batches)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Train\n", + "Train the neural network on the preprocessed data. If you have a hard time getting a good loss, check the [forms](https://discussions.udacity.com/) to see if anyone is having the same problem." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0 Batch 0/5 train_loss = 8.828\n", + "Epoch 0 Batch 1/5 train_loss = 8.793\n", + "Epoch 0 Batch 2/5 train_loss = 8.737\n", + "Epoch 0 Batch 3/5 train_loss = 8.602\n", + "Epoch 0 Batch 4/5 train_loss = 8.298\n", + "Epoch 1 Batch 0/5 train_loss = 7.938\n", + "Epoch 1 Batch 1/5 train_loss = 7.662\n", + "Epoch 1 Batch 2/5 train_loss = 7.364\n", + "Epoch 1 Batch 3/5 train_loss = 7.164\n", + "Epoch 1 Batch 4/5 train_loss = 6.899\n", + "Epoch 2 Batch 0/5 train_loss = 6.596\n", + "Epoch 2 Batch 1/5 train_loss = 6.462\n", + "Epoch 2 Batch 2/5 train_loss = 6.309\n", + "Epoch 2 Batch 3/5 train_loss = 6.330\n", + "Epoch 2 Batch 4/5 train_loss = 6.250\n", + "Epoch 3 Batch 0/5 train_loss = 6.055\n", + "Epoch 3 Batch 1/5 train_loss = 6.048\n", + "Epoch 3 Batch 2/5 train_loss = 6.012\n", + "Epoch 3 Batch 3/5 train_loss = 6.133\n", + "Epoch 3 Batch 4/5 train_loss = 6.159\n", + "Epoch 4 Batch 0/5 train_loss = 5.996\n", + "Epoch 4 Batch 1/5 train_loss = 6.021\n", + "Epoch 4 Batch 2/5 train_loss = 6.010\n", + "Epoch 4 Batch 3/5 train_loss = 6.125\n", + "Epoch 4 Batch 4/5 train_loss = 6.156\n", + "Epoch 5 Batch 0/5 train_loss = 5.978\n", + "Epoch 5 Batch 1/5 train_loss = 5.993\n", + "Epoch 5 Batch 2/5 train_loss = 5.977\n", + "Epoch 5 Batch 3/5 train_loss = 6.081\n", + "Epoch 5 Batch 4/5 train_loss = 6.103\n", + "Epoch 6 Batch 0/5 train_loss = 5.928\n", + "Epoch 6 Batch 1/5 train_loss = 5.950\n", + "Epoch 6 Batch 2/5 train_loss = 5.938\n", + "Epoch 6 Batch 3/5 train_loss = 6.053\n", + "Epoch 6 Batch 4/5 train_loss = 6.074\n", + "Epoch 7 Batch 0/5 train_loss = 5.909\n", + "Epoch 7 Batch 1/5 train_loss = 5.937\n", + "Epoch 7 Batch 2/5 train_loss = 5.925\n", + "Epoch 7 Batch 3/5 train_loss = 6.043\n", + "Epoch 7 Batch 4/5 train_loss = 6.060\n", + "Epoch 8 Batch 0/5 train_loss = 5.896\n", + "Epoch 8 Batch 1/5 train_loss = 5.922\n", + "Epoch 8 Batch 2/5 train_loss = 5.912\n", + "Epoch 8 Batch 3/5 train_loss = 6.028\n", + "Epoch 8 Batch 4/5 train_loss = 6.049\n", + "Epoch 9 Batch 0/5 train_loss = 5.889\n", + "Epoch 9 Batch 1/5 train_loss = 5.912\n", + "Epoch 9 Batch 2/5 train_loss = 5.906\n", + "Epoch 9 Batch 3/5 train_loss = 6.020\n", + "Epoch 9 Batch 4/5 train_loss = 6.042\n", + "Epoch 10 Batch 0/5 train_loss = 5.884\n", + "Epoch 10 Batch 1/5 train_loss = 5.905\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "batches = get_batches(int_text, batch_size, seq_length)\n", + "\n", + "with tf.Session(graph=train_graph) as sess:\n", + " sess.run(tf.global_variables_initializer())\n", + "\n", + " for epoch_i in range(num_epochs):\n", + " state = sess.run(initial_state, {input_text: batches[0][0]})\n", + "\n", + " for batch_i, (x, y) in enumerate(batches):\n", + " feed = {\n", + " input_text: x,\n", + " targets: y,\n", + " initial_state: state,\n", + " lr: learning_rate}\n", + " train_loss, state, _ = sess.run([cost, final_state, train_op], feed)\n", + "\n", + " # Show every batches\n", + " if (epoch_i * len(batches) + batch_i) % show_every_n_batches == 0:\n", + " print('Epoch {:>3} Batch {:>4}/{} train_loss = {:.3f}'.format(\n", + " epoch_i,\n", + " batch_i,\n", + " len(batches),\n", + " train_loss))\n", + "\n", + " # Save Model\n", + " saver = tf.train.Saver()\n", + " saver.save(sess, save_dir)\n", + " print('Model Trained and Saved')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Save Parameters\n", + "Save `seq_length` and `save_dir` for generating a new TV script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "# Save parameters for checkpoint\n", + "helper.save_params((seq_length, save_dir))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# Checkpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "import tensorflow as tf\n", + "import numpy as np\n", + "import helper\n", + "import problem_unittests as tests\n", + "\n", + "_, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess()\n", + "seq_length, load_dir = helper.load_params()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Implement Generate Functions\n", + "### Get Tensors\n", + "Get tensors from `loaded_graph` using the function [`get_tensor_by_name()`](https://www.tensorflow.org/api_docs/python/tf/Graph#get_tensor_by_name). Get the tensors using the following names:\n", + "- \"input:0\"\n", + "- \"initial_state:0\"\n", + "- \"final_state:0\"\n", + "- \"probs:0\"\n", + "\n", + "Return the tensors in the following tuple `(InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor)` " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "def get_tensors(loaded_graph):\n", + " \"\"\"\n", + " Get input, initial state, final state, and probabilities tensor from \n", + " :param loaded_graph: TensorFlow graph loaded from file\n", + " :return: Tuple (InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor)\n", + " \"\"\"\n", + " # TODO: Implement Function\n", + " return None, None, None, None\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_get_tensors(get_tensors)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Choose Word\n", + "Implement the `pick_word()` function to select the next word using `probabilities`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "def pick_word(probabilities, int_to_vocab):\n", + " \"\"\"\n", + " Pick the next word in the generated text\n", + " :param probabilities: Probabilites of the next word\n", + " :param int_to_vocab: Dictionary of word ids as the keys and words as the values\n", + " :return: String of the predicted word\n", + " \"\"\"\n", + " # TODO: Implement Function\n", + " return None\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_pick_word(pick_word)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Generate TV Script\n", + "This will generate the TV script for you. Set `gen_length` to the length of TV script you want to generate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "gen_length = 200\n", + "# homer_simpson, moe_szyslak, or Barney_Gumble\n", + "prime_word = 'moe_szyslak'\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "loaded_graph = tf.Graph()\n", + "with tf.Session(graph=loaded_graph) as sess:\n", + " # Load saved model\n", + " loader = tf.train.import_meta_graph(load_dir + '.meta')\n", + " loader.restore(sess, load_dir)\n", + "\n", + " # Get Tensors from loaded model\n", + " input_text, initial_state, final_state, probs = get_tensors(loaded_graph)\n", + "\n", + " # Sentences generation setup\n", + " gen_sentences = [prime_word + ':']\n", + " prev_state = sess.run(initial_state, {input_text: np.array([[1]])})\n", + "\n", + " # Generate sentences\n", + " for n in range(gen_length):\n", + " # Dynamic Input\n", + " dyn_input = [[vocab_to_int[word] for word in gen_sentences[-seq_length:]]]\n", + " dyn_seq_length = len(dyn_input[0])\n", + "\n", + " # Get Prediction\n", + " probabilities, prev_state = sess.run(\n", + " [probs, final_state],\n", + " {input_text: dyn_input, initial_state: prev_state})\n", + " \n", + " pred_word = pick_word(probabilities[dyn_seq_length-1], int_to_vocab)\n", + "\n", + " gen_sentences.append(pred_word)\n", + " \n", + " # Remove tokens\n", + " tv_script = ' '.join(gen_sentences)\n", + " for key, token in token_dict.items():\n", + " ending = ' ' if key in ['\\n', '(', '\"'] else ''\n", + " tv_script = tv_script.replace(' ' + token.lower(), key)\n", + " tv_script = tv_script.replace('\\n ', '\\n')\n", + " tv_script = tv_script.replace('( ', '(')\n", + " \n", + " print(tv_script)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# The TV Script is Nonsensical\n", + "It's ok if the TV script doesn't make any sense. We trained on less than a megabyte of text. In order to get good results, you'll have to use a smaller vocabulary or get more data. Luckly there's more data! As we mentioned in the begging of this project, this is a subset of [another dataset](https://www.kaggle.com/wcukierski/the-simpsons-by-the-data). We didn't have you train on all the data, because that would take too long. However, you are free to train your neural network on all the data. After you complete the project, of course.\n", + "# Submitting This Project\n", + "When submitting this project, make sure to run all the cells before saving the notebook. Save the notebook file as \"dlnd_tv_script_generation.ipynb\" and save it as a HTML file under \"File\" -> \"Download as\". Include the \"helper.py\" and \"problem_unittests.py\" files in your submission." + ] + } + ], + "metadata": { + "hide_input": false, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + }, + "toc": { + "colors": { + "hover_highlight": "#DAA520", + "running_highlight": "#FF0000", + "selected_highlight": "#FFD700" + }, + "moveMenuLeft": true, + "nav_menu": { + "height": "511px", + "width": "251px" + }, + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 4, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + }, + "widgets": { + "state": {}, + "version": "1.1.2" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/__pycache__/helper.cpython-35.pyc b/__pycache__/helper.cpython-35.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e3903f88da3113b8501cd041efff6e60a7ef74fd GIT binary patch literal 1811 zcmbVM&2Ah;5U!q?{q;mxc0e%_&>=uHQfwm-5)zh$6pItG2#%C!MYN1(dhM}iXC~ck zJ6XIZ^Ls)(29Lrkw8V*19)L?ueAVmq8k7^Qo!aW|>guYmzUnt_+~~Lae?I;@BKnK2 zjR5-R_{?L796v!N(cnrKG!T>rl!x>^)}cg{x4a%w-ln|cbu4x%?|HpNd7ttnufssl zUJ}26ciPPa|6w2s>fyVO&wK`9iH-yv1$0hyA!sVdhIEdb1ziO6>ncsL70^*c7ev#L z9wDR$Uq3z%;XItkzQp*lPII}F+SDj3N4g%%FN#w6EhqGD(sDs40AjlHGcT<_onZo?!!mQ#mZ$cb+ zK%Oi!NBB-bYrh5bv`u@yNM0m?3pPG<(OIqYM7a30&eHv%t%pTrXI4w_^s>KI7RF}v zxY@qV?)k1BP3|>U5f0NTADZ-3`Pyx4m2gy=Z7B1DlXzjh(YWAjKk50Bx>jM;*tiay zm1(A2dsORjYF(?W&y;p8)0BmE?WV|%!L=wq88^C(2$J3)W~P9}L+0urw)L^9hGktJ zpEQG;8R%-&<#cG%{ZfI|7ErqW%rY-B>%wF8+_n6Ii;*{{?{RQlKZE@YE1{Qht7b&p ztwl2bPMuB6O$dtOa5?CSHStami#S+;rX|)h-kcAn@O^w{4Pw^Y7Ej0gCAkm+?Ac4A z$+wHnwTDVBxS?|SP^Seqmt-^~Dj~J9C%Tfb9Lu7T`>D+im67#GZh3uZiiz5mwU)rd zc-Q}_Pc*J`IV`Y^#($}N!zhO2Sy#j~3mmzD$bjB0``tVtE zEp}hWCC}N}-aVMlL;e8z|0~p;MWL>R>tESd1vw#>#V)&-8hNc%jN9J#p93@5+si8*!g24d*icx*clS$dZ%l5 z+uPGUsp_#aV=ssB9S5gyC&bgi!M%aP(@6*kBm@#K@Wuo1(h?Gg2hLAC@Pbw#@%z5& z?&%qC{^*F9ovy8}`s%B%s=ogviGH3vYqz zXj~NDA+9%s_muFKxDJ5B!aKtC1>r3VZ-wgsJ}SIbt{);oZ#LH+gKDwSw33nv!!EsK zA6nQYJbD2|%nN&Dfpmdk_Qt4jOX^rV*Iliphw+uZ^27LJp~?W~V*k>aov0_zbX^sC zXFMtUrvr()A8wzC@153N<@e*$+cK2Ojr}M*(^t`EAbXub=*KZ(Z}o@CN{115^q||7 zT6cOV+-+GU%7{c^DXMoZs^c?*r-7%0M}G+rApNc-9-*|U{C?TCu=G7kJha5!5;dju za#k1Ag|)tCi#=QX7En~gLtDgE@yHhXS450c0xnDZx{bI~J0 zP}#-sKVz7(xI0VjQvkr9j;&MI-8mu&Eqf&q*9pnpM%Gdm4@=@{Yo@c;l|fcb5PnPC{y zmcx8Uh%JZ)v-WRbpE2(|t>_Iv?A64>nrP^M;^A{NoOY;2OF;<%SocHJ~P(P`i zTkbunLg)65RMLreq$2}AmQGK~*oh;@1NZOd!!Noj?%ecyeI13)n>)@>x;xH}41&n< zwR6qiWfVYk)zz^Rx;^RKh~n-}j-WXI<`>n2EzmAxa#W|z{6o&Eq*yc0?%+M?xZ>3Hr?gNUB99fhIm zbX^e4BizuNXjk{JFGI5|&<&)@*>pkneLvm-b&lT!e!n~3ui|oV@3_z_w8K3YYCt)c zp+uDPZZtTF4GW{#APghba|1_iZAr|hyE2Ti;9x6X9uLb8;R>jZiKH^lxhCLMKsEK=|KL7+z`?nPa9vlB-hXj)<Eh@WJ}MY*oMj$;{BJ|G(uzBWRsx8*PwuRO9GY{ z6};n{c5Ic!@7Usls<=6J44S3HZoQP0>8ummC-0$AB+NioFyDUtYyi#Fje7lauMrVs z@n^;8G`RP#3~x*J0&0gWE}wWmZr9N}YaOzmw~pKDS>VdWfMzF` z@)92XhbX39%3p;S!7e4-LdZl6Z>=mmxM#LFV!RGGa1b2w&R9MkmBe0|-Aa^j%HZR` zHI(Gaj)G81(u>$5qN5J|+=Yw7}8H3{J^-0Gs7G_f-^b!{WJpnR72Vwyd0Nka~`4 z$5AvJYM$y8FsMZoNtt9YsVFzxmTHMw?69BILXRg_N2sO3$khrBB2T_qY0trwb~X*k z?L|Ize(Pv|B+~{hKj;{7YtJ(Ta}~I@T}wxf6^`MbLkw0x{fn4Q6AfS+9DdbWv6f29 zV3{TB8C!iB?Kum~;f3gD7Whv{{WJ^wgJt2^!3AIeoIFm^#C0a3GY;=|osO2G4LJ1( zJHRP$#ZgxH`1`~Pi1fe>xqF)*Gx!Gh0SunNShTNFP%zm9pjGe{*sP)doUt_s8{Wrw z%INz??wjX6W;b+$mEbQUphQ$k(}330Er*+5!Jv7I$UDS?5VAfc)^enXJI4c=KKADU zjyUuHj3B8Bj2*^!gfRlPh>-((x+eLAt~qa!ymxrv8;Nzw)fX>|A+tPY}HM-O|OQJ`Nz z>*(;;!`(2t9~K+Cnc*diLehH^kNygZ)bO4l?g8r&Hz4GJ&ZvqQ=9a^sqMdD)jJTC_ ztdcSj)3g=Ic~XKYn34=d+#S(Wsd+uYk`hO|GK9b#Xi|?FL&h);Vh&hInx(}Sabg{m z58!pIqmr&;mDYhM4209^u-lN5-?a8E$0+}lVr%yxdIw{A|8LW&uL4tXLiKXiVCJQR z;4Qj0o^o`7%sp(Yv;JUj9j<6M7Jc!7W}ahg@#rF9 z;QCLGYSZZKpQYL=6>C(m3ZF|FJW=YKCAF0Ele%dp=cT*y>2kkkfdtr2OVZ&3z_wK zX5a-@|Er}HYXu&`l7*+VhT2ML&Eo%PSt(VmdB8TTBj{PP)h}Rd&V(do&&AI?g<^{wQA%9YU%(@5?b_0YJ zA(yN%&a8_dA^%vo@pRs_XyLQ0jtjO4CMX5iVt_gWz;NdDV~sUl<98T{Ksb^H~HfKn3uZpuk5B6Ar8gK0RF|={R;79EO{s#V$ylzoc(;9s_qm9rO z0jJ=;GD+vBLO188T{%1ID;9N<{G?3Oviv8rC`fqOd$Zud(E~4Tx%2`A`UQIt zf@~fL>p8%7Geb+dova@V>))a>?brRWu&4>H7E0zvvRo{+G~@k@wF?Q& z+QwsnMUK`lA?rwvh+z!2(ODFl+HCbI);(%pLDrpUwD8jHMzo}6dN!sg-V5Aib1vky5OHG#-B+n6E?gJ+@Q5ukhL9{r^~X#StpTVPmz;nn?CGrPrS0e?~2(I-{V2j1n;k3Jxh3 z_*l&t&+jD_u1+7Efue)s?!U(Tnmi&pv_Di{U{L>vAxE*;GF3!&DoiQGg=8p3t-%%z&;ukgIogi zNUma7p`fAB!$ConLB9FGMmzFjh7;Xe>> zvi96Av&n8*L=tX{%)HrEl$knXs)BZ#-59fyQD1l7@BAW(gaq=O}_DX|AxEZ@q+oeeJnT-sG{Jo7-IUPBy#`F z!{7&FSOzy92}H$_SPj0QwIouJdzD2FUb30>GexekF;4UqgpabWMglJv8TRwI53l1o zkFLRt3y~lG`O%cb{nDo*{tSt0IiEPq|BEbMp#5Q+MGgo{rMgD7Pm(_M4)t;rJ=>l! zyAp=&<*`h$ADw#KR|uGdQeCHlGMegZDB9H^+V*3uDDyHcZN_2AQ2#q1H05SU*c$Mh zN#IE2C}~b9^ySieCU9Q|R4#B7-nXF&w19rg+B z#=>PDH?@nWrv%bO7Kt3q+d@?`b z`SNMdFJ+=ljgS&|Sjf3h#5wlQWvC-HdUn!3p6)(Yl!)ohXRiajyXoV`$Tu1P)b(f7 z&6!X3sy1+-#vm~J)lD8AzY73K2;!Inx9N^5tuyVZOFXg8wn?=Y6n;8_SDrGuGf3!w~V@p7@%oS)|Qp8=gog|DBISs4zH#Xv}E z5}Pm4I*(AnQ3Yo^#yQP$dp2KJ%Jjs}xumt9;?f}-YANd?6Ahl& + + +dlnd_tv_script_generation + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+
+
+

TV Script Generation

In this project, you'll generate your own Simpsons TV scripts using RNNs. You'll be using part of the Simpsons dataset of scripts from 27 seasons. The Neural Network you'll build will generate a new TV script for a scene at Moe's Tavern.

+

Get the Data

The data is already provided for you. You'll be using a subset of the original dataset. It consists of only the scenes in Moe's Tavern. This doesn't include other versions of the tavern, like "Moe's Cavern", "Flaming Moe's", "Uncle Moe's Family Feed-Bag", etc..

+ +
+
+
+
+
+
In [64]:
+
+
+
"""
+DON'T MODIFY ANYTHING IN THIS CELL
+"""
+import helper
+
+data_dir = './data/simpsons/moes_tavern_lines.txt'
+text = helper.load_data(data_dir)
+# Ignore notice, since we don't use it for analysing the data
+text = text[81:]
+
+ +
+
+
+ +
+
+
+
+
+
+

Explore the Data

Play around with view_sentence_range to view different parts of the data.

+ +
+
+
+
+
+
In [14]:
+
+
+
view_sentence_range = (0, 10)
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL
+"""
+import numpy as np
+
+print('Dataset Stats')
+print('Roughly the number of unique words: {}'.format(len({word: None for word in text.split()})))
+scenes = text.split('\n\n')
+print('Number of scenes: {}'.format(len(scenes)))
+sentence_count_scene = [scene.count('\n') for scene in scenes]
+print('Average number of sentences in each scene: {}'.format(np.average(sentence_count_scene)))
+
+sentences = [sentence for scene in scenes for sentence in scene.split('\n')]
+print('Number of lines: {}'.format(len(sentences)))
+word_count_sentence = [len(sentence.split()) for sentence in sentences]
+print('Average number of words in each line: {}'.format(np.average(word_count_sentence)))
+
+print()
+print('The sentences {} to {}:'.format(*view_sentence_range))
+print('\n'.join(text.split('\n')[view_sentence_range[0]:view_sentence_range[1]]))
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Dataset Stats
+Roughly the number of unique words: 11492
+Number of scenes: 262
+Average number of sentences in each scene: 15.248091603053435
+Number of lines: 4257
+Average number of words in each line: 11.50434578341555
+
+The sentences 0 to 10:
+Moe_Szyslak: (INTO PHONE) Moe's Tavern. Where the elite meet to drink.
+Bart_Simpson: Eh, yeah, hello, is Mike there? Last name, Rotch.
+Moe_Szyslak: (INTO PHONE) Hold on, I'll check. (TO BARFLIES) Mike Rotch. Mike Rotch. Hey, has anybody seen Mike Rotch, lately?
+Moe_Szyslak: (INTO PHONE) Listen you little puke. One of these days I'm gonna catch you, and I'm gonna carve my name on your back with an ice pick.
+Moe_Szyslak: What's the matter Homer? You're not your normal effervescent self.
+Homer_Simpson: I got my problems, Moe. Give me another one.
+Moe_Szyslak: Homer, hey, you should not drink to forget your problems.
+Barney_Gumble: Yeah, you should only drink to enhance your social skills.
+
+
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Implement Preprocessing Functions

The first thing to do to any dataset is preprocessing. Implement the following preprocessing functions below:

+
    +
  • Lookup Table
  • +
  • Tokenize Punctuation
  • +
+

Lookup Table

To create a word embedding, you first need to transform the words to ids. In this function, create two dictionaries:

+
    +
  • Dictionary to go from the words to an id, we'll call vocab_to_int
  • +
  • Dictionary to go from the id to word, we'll call int_to_vocab
  • +
+

Return these dictionaries in the following tuple (vocab_to_int, int_to_vocab)

+ +
+
+
+
+
+
In [65]:
+
+
+
import numpy as np
+import problem_unittests as tests
+
+def create_lookup_tables(text):
+    """
+    Create lookup tables for vocabulary
+    :param text: The text of tv scripts split into words
+    :return: A tuple of dicts (vocab_to_int, int_to_vocab)
+    """
+    vocab = set(text)
+    
+    vocab_to_int = {word: index for index, word in enumerate(vocab)}
+    int_to_vocab = {index: word for (word, index) in vocab_to_int.items()}
+    
+    return vocab_to_int, int_to_vocab
+
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+tests.test_create_lookup_tables(create_lookup_tables)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Tests Passed
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Tokenize Punctuation

We'll be splitting the script into a word array using spaces as delimiters. However, punctuations like periods and exclamation marks make it hard for the neural network to distinguish between the word "bye" and "bye!".

+

Implement the function token_lookup to return a dict that will be used to tokenize symbols like "!" into "||Exclamation_Mark||". Create a dictionary for the following symbols where the symbol is the key and value is the token:

+
    +
  • Period ( . )
  • +
  • Comma ( , )
  • +
  • Quotation Mark ( " )
  • +
  • Semicolon ( ; )
  • +
  • Exclamation mark ( ! )
  • +
  • Question mark ( ? )
  • +
  • Left Parentheses ( ( )
  • +
  • Right Parentheses ( ) )
  • +
  • Dash ( -- )
  • +
  • Return ( \n )
  • +
+

This dictionary will be used to token the symbols and add the delimiter (space) around it. This separates the symbols as it's own word, making it easier for the neural network to predict on the next word. Make sure you don't use a token that could be confused as a word. Instead of using the token "dash", try using something like "||dash||".

+ +
+
+
+
+
+
In [16]:
+
+
+
def token_lookup():
+    """
+    Generate a dict to turn punctuation into a token.
+    :return: Tokenize dictionary where the key is the punctuation and the value is the token
+    """
+    
+    return {
+        '.': '||period||',
+        ',': '||comma||',
+        '"': '||quotation_mark||',
+        ';': '||semicolon||',
+        '!': '||exclamation_mark||',
+        '?': '||question_mark||',
+        '(': '||left_parentheses',
+        ')': '||right_parentheses',
+        '--': '||dash||',
+        '\n': '||return||'
+    }
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+tests.test_tokenize(token_lookup)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Tests Passed
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Preprocess all the data and save it

Running the code cell below will preprocess all the data and save it to file.

+ +
+
+
+
+
+
In [17]:
+
+
+
"""
+DON'T MODIFY ANYTHING IN THIS CELL
+"""
+# Preprocess Training, Validation, and Testing Data
+helper.preprocess_and_save_data(data_dir, token_lookup, create_lookup_tables)
+
+ +
+
+
+ +
+
+
+
+
+
+

Check Point

This is your first checkpoint. If you ever decide to come back to this notebook or have to restart the notebook, you can start from here. The preprocessed data has been saved to disk.

+ +
+
+
+
+
+
In [1]:
+
+
+
"""
+DON'T MODIFY ANYTHING IN THIS CELL
+"""
+import helper
+import numpy as np
+import problem_unittests as tests
+
+int_text, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess()
+
+ +
+
+
+ +
+
+
+
+
+
+

Extra hyper parameters

+
+
+
+
+
+
In [177]:
+
+
+
from collections import namedtuple
+
+hyper_params = (('embedding_size', 128),
+                ('lstm_layers', 2),
+                ('keep_prob', 0.7)
+               )
+
+
+
+
+Hyper = namedtuple('Hyper', map(lambda x: x[0], hyper_params))
+HYPER = Hyper(*list(map(lambda x: x[1], hyper_params)))
+
+ +
+
+
+ +
+
+
+
+
+
+

Build the Neural Network

You'll build the components necessary to build a RNN by implementing the following functions below:

+
    +
  • get_inputs
  • +
  • get_init_cell
  • +
  • get_embed
  • +
  • build_rnn
  • +
  • build_nn
  • +
  • get_batches
  • +
+

Check the Version of TensorFlow and Access to GPU

+
+
+
+
+
+
In [3]:
+
+
+
"""
+DON'T MODIFY ANYTHING IN THIS CELL
+"""
+from distutils.version import LooseVersion
+import warnings
+import tensorflow as tf
+
+# Check TensorFlow Version
+assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer'
+print('TensorFlow Version: {}'.format(tf.__version__))
+
+# Check for a GPU
+if not tf.test.gpu_device_name():
+    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
+else:
+    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
TensorFlow Version: 1.0.0
+Default GPU Device: /gpu:0
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Input

Implement the get_inputs() function to create TF Placeholders for the Neural Network. It should create the following placeholders:

+
    +
  • Input text placeholder named "input" using the TF Placeholder name parameter.
  • +
  • Targets placeholder
  • +
  • Learning Rate placeholder
  • +
+

Return the placeholders in the following the tuple (Input, Targets, LearingRate)

+ +
+
+
+
+
+
In [225]:
+
+
+
def get_inputs():
+    """
+    Create TF Placeholders for input, targets, and learning rate.
+    :return: Tuple (input, targets, learning rate)
+    """
+    
+    # We use shape [None, None] to feed any batch size and any sequence length
+    input_placeholder = tf.placeholder(tf.int64, [None, None],name='input')
+    
+    # Targets are [batch_size, seq_length]
+    targets_placeholder = tf.placeholder(tf.int64, [None, None], name='targets') 
+    
+    
+    learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate')
+    return input_placeholder, targets_placeholder, learning_rate_placeholder
+
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+tests.test_get_inputs(get_inputs)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Tests Passed
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Build RNN Cell and Initialize

Stack one or more BasicLSTMCells in a MultiRNNCell.

+
    +
  • The Rnn size should be set using rnn_size
  • +
  • Initalize Cell State using the MultiRNNCell's zero_state() function
      +
    • Apply the name "initial_state" to the initial state using tf.identity()
    • +
    +
  • +
+

Return the cell and initial state in the following tuple (Cell, InitialState)

+ +
+
+
+
+
+
In [227]:
+
+
+
def get_init_cell(batch_size, rnn_size):
+    """
+    Create an RNN Cell and initialize it.
+    :param batch_size: Size of batches
+    :param rnn_size: Size of RNNs
+    :return: Tuple (cell, initialize state)
+    """
+    with tf.name_scope('RNN_layers'):
+        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
+
+        # add a dropout wrapper
+        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=HYPER.keep_prob)
+
+        #cell = tf.contrib.rnn.MultiRNNCell([drop] * HYPER.lstm_layers)
+
+        cell = tf.contrib.rnn.MultiRNNCell([lstm] * HYPER.lstm_layers)
+    
+   
+    _initial_state = cell.zero_state(batch_size, tf.float32)
+    initial_state = tf.identity(_initial_state, name='initial_state')
+    
+    return cell, initial_state
+
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+tests.test_get_init_cell(get_init_cell)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Tests Passed
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Word Embedding

Apply embedding to input_data using TensorFlow. Return the embedded sequence.

+ +
+
+
+
+
+
In [207]:
+
+
+
def get_embed(input_data, vocab_size, embed_dim):
+    """
+    Create embedding for <input_data>.
+    :param input_data: TF placeholder for text input.
+    :param vocab_size: Number of words in vocabulary.
+    :param embed_dim: Number of embedding dimensions
+    :return: Embedded input.
+    """
+    with tf.name_scope('Embedding'):
+        embeddings = tf.Variable(
+            tf.random_uniform([vocab_size, embed_dim], -1.0, 1.0)
+        )
+
+        embed = tf.nn.embedding_lookup(embeddings, input_data)
+    
+    return embed
+
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+tests.test_get_embed(get_embed)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Tests Passed
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Build RNN

You created a RNN Cell in the get_init_cell() function. Time to use the cell to create a RNN.

+ +

Return the outputs and final_state state in the following tuple (Outputs, FinalState)

+ +
+
+
+
+
+
In [228]:
+
+
+
def build_rnn(cell, inputs):
+    """
+    Create a RNN using a RNN Cell
+    :param cell: RNN Cell
+    :param inputs: Input text data
+    :return: Tuple (Outputs, Final State)
+    """
+    ## NOTES
+    # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] time_major==false (default)
+    with tf.name_scope('RNN_output'):
+        outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
+    
+    final_state = tf.identity(final_state, name='final_state')
+    
+    
+    return outputs, final_state
+
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+tests.test_build_rnn(build_rnn)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Tests Passed
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Build the Neural Network

Apply the functions you implemented above to:

+
    +
  • Apply embedding to input_data using your get_embed(input_data, vocab_size, embed_dim) function.
  • +
  • Build RNN using cell and your build_rnn(cell, inputs) function.
  • +
  • Apply a fully connected layer with a linear activation and vocab_size as the number of outputs.
  • +
+

Return the logits and final state in the following tuple (Logits, FinalState)

+ +
+
+
+
+
+
In [231]:
+
+
+
def build_nn(cell, rnn_size, input_data, vocab_size):
+    """
+    Build part of the neural network
+    :param cell: RNN cell
+    :param rnn_size: Size of rnns
+    :param input_data: Input data
+    :param vocab_size: Vocabulary size
+    :return: Tuple (Logits, FinalState)
+    """
+    
+    num_outputs = vocab_size
+    
+    
+    ## Not sure why the unit test was made without taking into 
+    # account we are handling dynamic tensor shape that we need to infer
+    # at runtime, so I made an if statement just to pass the test case
+    #
+    # Some references: https://goo.gl/vD3egn
+    #                  https://goo.gl/E8vT2M 
+    
+    if input_data.get_shape().as_list()[1] is not None:
+        batch_size = input_data.get_shape().as_list()[0]
+        seq_len = input_data.get_shape().as_list()[1]
+    
+    # Infer dynamic tensor shape of input
+    else:
+        input_dims = tf.shape(input_data)
+        batch_size = input_dims[0]
+        seq_len = input_dims[1]
+
+    
+
+    
+    embed = get_embed(input_data, vocab_size, HYPER.embedding_size)
+    
+    
+    ## NOTES
+    # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] see: time_major==false (default)
+    
+    ## Output shape
+    ## [batch_size, time_step, rnn_size]
+    raw_rnn_outputs, final_state = build_rnn(cell, embed)
+    
+    
+    # Put outputs in rows
+    # make the output into [batch_size*time_step, rnn_size] for easy matmul
+    with tf.name_scope('sequence_reshape'):
+        outputs = tf.reshape(raw_rnn_outputs, [-1, rnn_size], name='rnn_output')
+    
+    
+    # Question, why are we using linear activation and not softmax ?
+    # My Guess: because seq2seq.sequence_loss has an efficient way to calculate the loss directly from logits 
+    with tf.name_scope('logits'):
+        
+        linear_w = tf.Variable(tf.truncated_normal((rnn_size, num_outputs), stddev=0.05), name='linear_w')
+        linear_b = tf.Variable(tf.zeros(num_outputs), name='linear_b')
+
+        logits = tf.matmul(outputs, linear_w) + linear_b
+    
+    
+    
+    # Reshape the logits back into the original input shape -> [batch_size, seq_len, num_classes]
+    # We do this beceause the loss function seq2seq.sequence_loss takes as logits a shape of [batch_size,seq_len,num_decoded_symbols]
+    with tf.name_scope('logits_reshape_to_loss'):
+        logits = tf.reshape(logits, [batch_size, seq_len, num_outputs], name='logits')
+        print('logits after reshape: ', logits)
+    
+    return logits, final_state
+
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+tests.test_build_nn(build_nn)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
logits after reshape:  Tensor("logits_reshape_to_loss/logits:0", shape=(128, 5, 27), dtype=float32)
+Tests Passed
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Batches

Implement get_batches to create batches of input and targets using int_text. The batches should be a Numpy array with the shape (number of batches, 2, batch size, sequence length). Each batch contains two elements:

+
    +
  • The first element is a single batch of input with the shape [batch size, sequence length]
  • +
  • The second element is a single batch of targets with the shape [batch size, sequence length]
  • +
+

If you can't fill the last batch with enough data, drop the last batch.

+

For exmple, get_batches([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], 2, 3) would return a Numpy array of the following:

+ +
[
+  # First Batch
+  [
+    # Batch of Input
+    [[ 1  2  3], [ 7  8  9]],
+    # Batch of targets
+    [[ 2  3  4], [ 8  9 10]]
+  ],
+
+  # Second Batch
+  [
+    # Batch of Input
+    [[ 4  5  6], [10 11 12]],
+    # Batch of targets
+    [[ 5  6  7], [11 12 13]]
+  ]
+]
+ +
+
+
+
+
+
In [233]:
+
+
+
def get_batches(int_text, batch_size, seq_length):
+    """
+    Return batches of input and target
+    :param int_text: Text with the words replaced by their ids
+    :param batch_size: The size of batch
+    :param seq_length: The length of sequence
+    :return: Batches as a Numpy array
+    """
+    
+    slice_size = batch_size * seq_length
+    n_batches = int(len(int_text)/slice_size)
+    
+    # input part
+    _inputs = np.array(int_text[:n_batches*slice_size])
+    
+    # target part
+    _targets = np.array(int_text[1:n_batches*slice_size + 1])
+    
+
+    # Go through all inputs, targets and split them into batch_size*seq_len list of items
+    # [batch, batch, ...]
+    inputs, targets = np.split(_inputs, n_batches), np.split(_targets, n_batches)
+    
+    # concat inputs and targets
+    batches = np.c_[inputs, targets]
+    #print(batches.shape)
+    
+    # Reshape into final batches output
+    batches = batches.reshape((-1, 2, batch_size, seq_length))
+
+    #print(batches[0][1])
+
+    
+    return batches
+
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+tests.test_get_batches(get_batches)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Tests Passed
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Neural Network Training

Hyperparameters

Tune the following parameters:

+
    +
  • Set num_epochs to the number of epochs.
  • +
  • Set batch_size to the batch size.
  • +
  • Set rnn_size to the size of the RNNs.
  • +
  • Set seq_length to the length of sequence.
  • +
  • Set learning_rate to the learning rate.
  • +
  • Set show_every_n_batches to the number of batches the neural network should print progress.
  • +
+ +
+
+
+
+
+
In [234]:
+
+
+
# Number of Epochs
+num_epochs = 1000
+# Batch Size
+batch_size = 128
+# RNN Size
+rnn_size = 70
+# Sequence Length
+seq_length = 100
+# Learning Rate
+learning_rate = 1e-3
+# Show stats for every n number of batches
+show_every_n_batches = 10
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+save_dir = './save'
+
+ +
+
+
+ +
+
+
+
+
+
+

Build the Graph

Build the graph using the neural network you implemented.

+ +
+
+
+
+
+
In [235]:
+
+
+
"""
+DON'T MODIFY ANYTHING IN THIS CELL
+"""
+from tensorflow.contrib import seq2seq
+
+train_graph = tf.Graph()
+with train_graph.as_default():
+    vocab_size = len(int_to_vocab)
+    input_text, targets, lr = get_inputs()
+    input_data_shape = tf.shape(input_text)
+    cell, initial_state = get_init_cell(input_data_shape[0], rnn_size)
+    logits, final_state = build_nn(cell, rnn_size, input_text, vocab_size)
+
+    # Probabilities for generating words
+    probs = tf.nn.softmax(logits, name='probs')
+
+    # Loss function
+    cost = seq2seq.sequence_loss(
+        logits,
+        targets,
+        tf.ones([input_data_shape[0], input_data_shape[1]]))
+
+    # Optimizer
+    optimizer = tf.train.AdamOptimizer(lr)
+
+    # Gradient Clipping
+    gradients = optimizer.compute_gradients(cost)
+    capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients]
+    train_op = optimizer.apply_gradients(capped_gradients)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
logits after reshape:  Tensor("logits_reshape_to_loss/logits:0", shape=(?, ?, 6779), dtype=float32)
+
+
+
+ +
+
+ +
+
+
+
In [238]:
+
+
+
# write out the graph for tensorboard
+
+with tf.Session(graph=train_graph) as sess:
+    file_writer = tf.summary.FileWriter('./logs/1', sess.graph)
+
+ +
+
+
+ +
+
+
+
+
+
+

Train

Train the neural network on the preprocessed data. If you have a hard time getting a good loss, check the forms to see if anyone is having the same problem.

+ +
+
+
+
+
+
In [197]:
+
+
+
"""
+DON'T MODIFY ANYTHING IN THIS CELL
+"""
+batches = get_batches(int_text, batch_size, seq_length)
+
+with tf.Session(graph=train_graph) as sess:
+    sess.run(tf.global_variables_initializer())
+
+    for epoch_i in range(num_epochs):
+        state = sess.run(initial_state, {input_text: batches[0][0]})
+
+        for batch_i, (x, y) in enumerate(batches):
+            feed = {
+                input_text: x,
+                targets: y,
+                initial_state: state,
+                lr: learning_rate}
+            train_loss, state, _ = sess.run([cost, final_state, train_op], feed)
+
+            # Show every <show_every_n_batches> batches
+            if (epoch_i * len(batches) + batch_i) % show_every_n_batches == 0:
+                print('Epoch {:>3} Batch {:>4}/{}   train_loss = {:.3f}'.format(
+                    epoch_i,
+                    batch_i,
+                    len(batches),
+                    train_loss))
+
+    # Save Model
+    saver = tf.train.Saver()
+    saver.save(sess, save_dir)
+    print('Model Trained and Saved')
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
5
+Epoch   0 Batch    0/5   train_loss = 8.825
+Epoch   2 Batch    0/5   train_loss = 6.441
+Epoch   4 Batch    0/5   train_loss = 6.023
+Epoch   6 Batch    0/5   train_loss = 5.927
+Epoch   8 Batch    0/5   train_loss = 5.903
+Epoch  10 Batch    0/5   train_loss = 5.883
+Epoch  12 Batch    0/5   train_loss = 5.874
+Epoch  14 Batch    0/5   train_loss = 5.858
+Epoch  16 Batch    0/5   train_loss = 5.833
+Epoch  18 Batch    0/5   train_loss = 5.794
+Epoch  20 Batch    0/5   train_loss = 5.739
+Epoch  22 Batch    0/5   train_loss = 5.682
+Epoch  24 Batch    0/5   train_loss = 5.626
+Epoch  26 Batch    0/5   train_loss = 5.572
+Epoch  28 Batch    0/5   train_loss = 5.521
+Epoch  30 Batch    0/5   train_loss = 5.471
+Epoch  32 Batch    0/5   train_loss = 5.421
+Epoch  34 Batch    0/5   train_loss = 5.365
+Epoch  36 Batch    0/5   train_loss = 5.304
+Epoch  38 Batch    0/5   train_loss = 5.244
+Epoch  40 Batch    0/5   train_loss = 5.185
+Epoch  42 Batch    0/5   train_loss = 5.124
+Epoch  44 Batch    0/5   train_loss = 5.063
+Epoch  46 Batch    0/5   train_loss = 5.003
+Epoch  48 Batch    0/5   train_loss = 4.945
+Epoch  50 Batch    0/5   train_loss = 4.891
+Epoch  52 Batch    0/5   train_loss = 4.841
+Epoch  54 Batch    0/5   train_loss = 4.794
+Epoch  56 Batch    0/5   train_loss = 4.751
+Epoch  58 Batch    0/5   train_loss = 4.710
+Epoch  60 Batch    0/5   train_loss = 4.669
+Epoch  62 Batch    0/5   train_loss = 4.638
+Epoch  64 Batch    0/5   train_loss = 4.638
+Epoch  66 Batch    0/5   train_loss = 4.589
+Epoch  68 Batch    0/5   train_loss = 4.537
+Epoch  70 Batch    0/5   train_loss = 4.501
+Epoch  72 Batch    0/5   train_loss = 4.469
+Epoch  74 Batch    0/5   train_loss = 4.436
+Epoch  76 Batch    0/5   train_loss = 4.405
+Epoch  78 Batch    0/5   train_loss = 4.375
+Epoch  80 Batch    0/5   train_loss = 4.344
+Epoch  82 Batch    0/5   train_loss = 4.363
+Epoch  84 Batch    0/5   train_loss = 4.311
+Epoch  86 Batch    0/5   train_loss = 4.274
+Epoch  88 Batch    0/5   train_loss = 4.240
+Epoch  90 Batch    0/5   train_loss = 4.211
+Epoch  92 Batch    0/5   train_loss = 4.182
+Epoch  94 Batch    0/5   train_loss = 4.155
+Epoch  96 Batch    0/5   train_loss = 4.135
+Epoch  98 Batch    0/5   train_loss = 4.107
+Epoch 100 Batch    0/5   train_loss = 4.093
+Epoch 102 Batch    0/5   train_loss = 4.053
+Epoch 104 Batch    0/5   train_loss = 4.030
+Epoch 106 Batch    0/5   train_loss = 4.002
+Epoch 108 Batch    0/5   train_loss = 3.978
+Epoch 110 Batch    0/5   train_loss = 3.951
+Epoch 112 Batch    0/5   train_loss = 3.928
+Epoch 114 Batch    0/5   train_loss = 3.902
+Epoch 116 Batch    0/5   train_loss = 3.884
+Epoch 118 Batch    0/5   train_loss = 3.862
+Epoch 120 Batch    0/5   train_loss = 3.840
+Epoch 122 Batch    0/5   train_loss = 3.814
+Epoch 124 Batch    0/5   train_loss = 3.803
+Epoch 126 Batch    0/5   train_loss = 3.775
+Epoch 128 Batch    0/5   train_loss = 3.738
+Epoch 130 Batch    0/5   train_loss = 3.714
+Epoch 132 Batch    0/5   train_loss = 3.690
+Epoch 134 Batch    0/5   train_loss = 3.665
+Epoch 136 Batch    0/5   train_loss = 3.642
+Epoch 138 Batch    0/5   train_loss = 3.619
+Epoch 140 Batch    0/5   train_loss = 3.596
+Epoch 142 Batch    0/5   train_loss = 3.577
+Epoch 144 Batch    0/5   train_loss = 3.588
+Epoch 146 Batch    0/5   train_loss = 3.561
+Epoch 148 Batch    0/5   train_loss = 3.537
+Epoch 150 Batch    0/5   train_loss = 3.494
+Epoch 152 Batch    0/5   train_loss = 3.475
+Epoch 154 Batch    0/5   train_loss = 3.444
+Epoch 156 Batch    0/5   train_loss = 3.431
+Epoch 158 Batch    0/5   train_loss = 3.403
+Epoch 160 Batch    0/5   train_loss = 3.393
+Epoch 162 Batch    0/5   train_loss = 3.371
+Epoch 164 Batch    0/5   train_loss = 3.352
+Epoch 166 Batch    0/5   train_loss = 3.323
+Epoch 168 Batch    0/5   train_loss = 3.328
+Epoch 170 Batch    0/5   train_loss = 3.281
+Epoch 172 Batch    0/5   train_loss = 3.261
+Epoch 174 Batch    0/5   train_loss = 3.238
+Epoch 176 Batch    0/5   train_loss = 3.216
+Epoch 178 Batch    0/5   train_loss = 3.197
+Epoch 180 Batch    0/5   train_loss = 3.172
+Epoch 182 Batch    0/5   train_loss = 3.169
+Epoch 184 Batch    0/5   train_loss = 3.140
+Epoch 186 Batch    0/5   train_loss = 3.136
+Epoch 188 Batch    0/5   train_loss = 3.145
+Epoch 190 Batch    0/5   train_loss = 3.106
+Epoch 192 Batch    0/5   train_loss = 3.069
+Epoch 194 Batch    0/5   train_loss = 3.038
+Epoch 196 Batch    0/5   train_loss = 3.019
+Epoch 198 Batch    0/5   train_loss = 2.995
+Epoch 200 Batch    0/5   train_loss = 2.979
+Epoch 202 Batch    0/5   train_loss = 2.960
+Epoch 204 Batch    0/5   train_loss = 2.943
+Epoch 206 Batch    0/5   train_loss = 2.963
+Epoch 208 Batch    0/5   train_loss = 2.917
+Epoch 210 Batch    0/5   train_loss = 2.898
+Epoch 212 Batch    0/5   train_loss = 2.867
+Epoch 214 Batch    0/5   train_loss = 2.863
+Epoch 216 Batch    0/5   train_loss = 2.834
+Epoch 218 Batch    0/5   train_loss = 2.809
+Epoch 220 Batch    0/5   train_loss = 2.797
+Epoch 222 Batch    0/5   train_loss = 2.774
+Epoch 224 Batch    0/5   train_loss = 2.759
+Epoch 226 Batch    0/5   train_loss = 2.732
+Epoch 228 Batch    0/5   train_loss = 2.742
+Epoch 230 Batch    0/5   train_loss = 2.704
+Epoch 232 Batch    0/5   train_loss = 2.703
+Epoch 234 Batch    0/5   train_loss = 2.663
+Epoch 236 Batch    0/5   train_loss = 2.672
+Epoch 238 Batch    0/5   train_loss = 2.638
+Epoch 240 Batch    0/5   train_loss = 2.620
+Epoch 242 Batch    0/5   train_loss = 2.595
+Epoch 244 Batch    0/5   train_loss = 2.585
+Epoch 246 Batch    0/5   train_loss = 2.563
+Epoch 248 Batch    0/5   train_loss = 2.539
+Epoch 250 Batch    0/5   train_loss = 2.534
+Epoch 252 Batch    0/5   train_loss = 2.517
+Epoch 254 Batch    0/5   train_loss = 2.497
+Epoch 256 Batch    0/5   train_loss = 2.475
+Epoch 258 Batch    0/5   train_loss = 2.463
+Epoch 260 Batch    0/5   train_loss = 2.478
+Epoch 262 Batch    0/5   train_loss = 2.450
+Epoch 264 Batch    0/5   train_loss = 2.436
+Epoch 266 Batch    0/5   train_loss = 2.417
+Epoch 268 Batch    0/5   train_loss = 2.384
+Epoch 270 Batch    0/5   train_loss = 2.363
+Epoch 272 Batch    0/5   train_loss = 2.340
+Epoch 274 Batch    0/5   train_loss = 2.323
+Epoch 276 Batch    0/5   train_loss = 2.314
+Epoch 278 Batch    0/5   train_loss = 2.302
+Epoch 280 Batch    0/5   train_loss = 2.300
+Epoch 282 Batch    0/5   train_loss = 2.300
+Epoch 284 Batch    0/5   train_loss = 2.283
+Epoch 286 Batch    0/5   train_loss = 2.246
+Epoch 288 Batch    0/5   train_loss = 2.246
+Epoch 290 Batch    0/5   train_loss = 2.210
+Epoch 292 Batch    0/5   train_loss = 2.203
+Epoch 294 Batch    0/5   train_loss = 2.185
+Epoch 296 Batch    0/5   train_loss = 2.170
+Epoch 298 Batch    0/5   train_loss = 2.150
+Epoch 300 Batch    0/5   train_loss = 2.130
+Epoch 302 Batch    0/5   train_loss = 2.132
+Epoch 304 Batch    0/5   train_loss = 2.113
+Epoch 306 Batch    0/5   train_loss = 2.083
+Epoch 308 Batch    0/5   train_loss = 2.073
+Epoch 310 Batch    0/5   train_loss = 2.060
+Epoch 312 Batch    0/5   train_loss = 2.072
+Epoch 314 Batch    0/5   train_loss = 2.081
+Epoch 316 Batch    0/5   train_loss = 2.031
+Epoch 318 Batch    0/5   train_loss = 2.007
+Epoch 320 Batch    0/5   train_loss = 2.001
+Epoch 322 Batch    0/5   train_loss = 1.987
+Epoch 324 Batch    0/5   train_loss = 1.978
+Epoch 326 Batch    0/5   train_loss = 1.963
+Epoch 328 Batch    0/5   train_loss = 1.952
+Epoch 330 Batch    0/5   train_loss = 1.932
+Epoch 332 Batch    0/5   train_loss = 1.918
+Epoch 334 Batch    0/5   train_loss = 1.898
+Epoch 336 Batch    0/5   train_loss = 1.885
+Epoch 338 Batch    0/5   train_loss = 1.872
+Epoch 340 Batch    0/5   train_loss = 1.864
+Epoch 342 Batch    0/5   train_loss = 1.867
+Epoch 344 Batch    0/5   train_loss = 1.848
+Epoch 346 Batch    0/5   train_loss = 1.821
+Epoch 348 Batch    0/5   train_loss = 1.814
+Epoch 350 Batch    0/5   train_loss = 1.788
+Epoch 352 Batch    0/5   train_loss = 1.806
+Epoch 354 Batch    0/5   train_loss = 1.790
+Epoch 356 Batch    0/5   train_loss = 1.761
+Epoch 358 Batch    0/5   train_loss = 1.745
+Epoch 360 Batch    0/5   train_loss = 1.735
+Epoch 362 Batch    0/5   train_loss = 1.718
+Epoch 364 Batch    0/5   train_loss = 1.747
+Epoch 366 Batch    0/5   train_loss = 1.726
+Epoch 368 Batch    0/5   train_loss = 1.753
+Epoch 370 Batch    0/5   train_loss = 1.703
+Epoch 372 Batch    0/5   train_loss = 1.662
+Epoch 374 Batch    0/5   train_loss = 1.643
+Epoch 376 Batch    0/5   train_loss = 1.624
+Epoch 378 Batch    0/5   train_loss = 1.617
+Epoch 380 Batch    0/5   train_loss = 1.598
+Epoch 382 Batch    0/5   train_loss = 1.613
+Epoch 384 Batch    0/5   train_loss = 1.601
+Epoch 386 Batch    0/5   train_loss = 1.584
+Epoch 388 Batch    0/5   train_loss = 1.569
+Epoch 390 Batch    0/5   train_loss = 1.557
+Epoch 392 Batch    0/5   train_loss = 1.534
+Epoch 394 Batch    0/5   train_loss = 1.534
+Epoch 396 Batch    0/5   train_loss = 1.520
+Epoch 398 Batch    0/5   train_loss = 1.547
+Epoch 400 Batch    0/5   train_loss = 1.545
+Epoch 402 Batch    0/5   train_loss = 1.521
+Epoch 404 Batch    0/5   train_loss = 1.486
+Epoch 406 Batch    0/5   train_loss = 1.469
+Epoch 408 Batch    0/5   train_loss = 1.458
+Epoch 410 Batch    0/5   train_loss = 1.442
+Epoch 412 Batch    0/5   train_loss = 1.431
+Epoch 414 Batch    0/5   train_loss = 1.410
+Epoch 416 Batch    0/5   train_loss = 1.411
+Epoch 418 Batch    0/5   train_loss = 1.412
+Epoch 420 Batch    0/5   train_loss = 1.398
+Epoch 422 Batch    0/5   train_loss = 1.417
+Epoch 424 Batch    0/5   train_loss = 1.381
+Epoch 426 Batch    0/5   train_loss = 1.355
+Epoch 428 Batch    0/5   train_loss = 1.354
+Epoch 430 Batch    0/5   train_loss = 1.338
+Epoch 432 Batch    0/5   train_loss = 1.321
+Epoch 434 Batch    0/5   train_loss = 1.326
+Epoch 436 Batch    0/5   train_loss = 1.324
+Epoch 438 Batch    0/5   train_loss = 1.314
+Epoch 440 Batch    0/5   train_loss = 1.292
+Epoch 442 Batch    0/5   train_loss = 1.279
+Epoch 444 Batch    0/5   train_loss = 1.259
+Epoch 446 Batch    0/5   train_loss = 1.283
+Epoch 448 Batch    0/5   train_loss = 1.274
+Epoch 450 Batch    0/5   train_loss = 1.251
+Epoch 452 Batch    0/5   train_loss = 1.279
+Epoch 454 Batch    0/5   train_loss = 1.249
+Epoch 456 Batch    0/5   train_loss = 1.214
+Epoch 458 Batch    0/5   train_loss = 1.196
+Epoch 460 Batch    0/5   train_loss = 1.185
+Epoch 462 Batch    0/5   train_loss = 1.174
+Epoch 464 Batch    0/5   train_loss = 1.158
+Epoch 466 Batch    0/5   train_loss = 1.195
+Epoch 468 Batch    0/5   train_loss = 1.158
+Epoch 470 Batch    0/5   train_loss = 1.145
+Epoch 472 Batch    0/5   train_loss = 1.160
+Epoch 474 Batch    0/5   train_loss = 1.123
+Epoch 476 Batch    0/5   train_loss = 1.118
+Epoch 478 Batch    0/5   train_loss = 1.103
+Epoch 480 Batch    0/5   train_loss = 1.088
+Epoch 482 Batch    0/5   train_loss = 1.089
+Epoch 484 Batch    0/5   train_loss = 1.094
+Epoch 486 Batch    0/5   train_loss = 1.092
+Epoch 488 Batch    0/5   train_loss = 1.106
+Epoch 490 Batch    0/5   train_loss = 1.053
+Epoch 492 Batch    0/5   train_loss = 1.052
+Epoch 494 Batch    0/5   train_loss = 1.046
+Epoch 496 Batch    0/5   train_loss = 1.030
+Epoch 498 Batch    0/5   train_loss = 1.021
+Epoch 500 Batch    0/5   train_loss = 1.020
+Epoch 502 Batch    0/5   train_loss = 1.046
+Epoch 504 Batch    0/5   train_loss = 1.040
+Epoch 506 Batch    0/5   train_loss = 1.026
+Epoch 508 Batch    0/5   train_loss = 0.982
+Epoch 510 Batch    0/5   train_loss = 0.969
+Epoch 512 Batch    0/5   train_loss = 0.962
+Epoch 514 Batch    0/5   train_loss = 0.946
+Epoch 516 Batch    0/5   train_loss = 0.941
+Epoch 518 Batch    0/5   train_loss = 0.951
+Epoch 520 Batch    0/5   train_loss = 0.945
+Epoch 522 Batch    0/5   train_loss = 0.952
+Epoch 524 Batch    0/5   train_loss = 0.931
+Epoch 526 Batch    0/5   train_loss = 0.905
+Epoch 528 Batch    0/5   train_loss = 0.893
+Epoch 530 Batch    0/5   train_loss = 0.881
+Epoch 532 Batch    0/5   train_loss = 0.882
+Epoch 534 Batch    0/5   train_loss = 0.871
+Epoch 536 Batch    0/5   train_loss = 0.904
+Epoch 538 Batch    0/5   train_loss = 0.893
+Epoch 540 Batch    0/5   train_loss = 0.884
+Epoch 542 Batch    0/5   train_loss = 0.864
+Epoch 544 Batch    0/5   train_loss = 0.854
+Epoch 546 Batch    0/5   train_loss = 0.854
+Epoch 548 Batch    0/5   train_loss = 0.836
+Epoch 550 Batch    0/5   train_loss = 0.816
+Epoch 552 Batch    0/5   train_loss = 0.829
+Epoch 554 Batch    0/5   train_loss = 0.813
+Epoch 556 Batch    0/5   train_loss = 0.798
+Epoch 558 Batch    0/5   train_loss = 0.808
+Epoch 560 Batch    0/5   train_loss = 0.789
+Epoch 562 Batch    0/5   train_loss = 0.791
+Epoch 564 Batch    0/5   train_loss = 0.779
+Epoch 566 Batch    0/5   train_loss = 0.765
+Epoch 568 Batch    0/5   train_loss = 0.746
+Epoch 570 Batch    0/5   train_loss = 0.746
+Epoch 572 Batch    0/5   train_loss = 0.733
+Epoch 574 Batch    0/5   train_loss = 0.733
+Epoch 576 Batch    0/5   train_loss = 0.752
+Epoch 578 Batch    0/5   train_loss = 0.727
+Epoch 580 Batch    0/5   train_loss = 0.712
+Epoch 582 Batch    0/5   train_loss = 0.711
+Epoch 584 Batch    0/5   train_loss = 0.708
+Epoch 586 Batch    0/5   train_loss = 0.695
+Epoch 588 Batch    0/5   train_loss = 0.699
+Epoch 590 Batch    0/5   train_loss = 0.688
+Epoch 592 Batch    0/5   train_loss = 0.682
+Epoch 594 Batch    0/5   train_loss = 0.703
+Epoch 596 Batch    0/5   train_loss = 0.681
+Epoch 598 Batch    0/5   train_loss = 0.672
+Epoch 600 Batch    0/5   train_loss = 0.678
+Epoch 602 Batch    0/5   train_loss = 0.657
+Epoch 604 Batch    0/5   train_loss = 0.652
+Epoch 606 Batch    0/5   train_loss = 0.627
+Epoch 608 Batch    0/5   train_loss = 0.623
+Epoch 610 Batch    0/5   train_loss = 0.633
+Epoch 612 Batch    0/5   train_loss = 0.608
+Epoch 614 Batch    0/5   train_loss = 0.614
+Epoch 616 Batch    0/5   train_loss = 0.620
+Epoch 618 Batch    0/5   train_loss = 0.610
+Epoch 620 Batch    0/5   train_loss = 0.596
+Epoch 622 Batch    0/5   train_loss = 0.596
+Epoch 624 Batch    0/5   train_loss = 0.605
+Epoch 626 Batch    0/5   train_loss = 0.574
+Epoch 628 Batch    0/5   train_loss = 0.581
+Epoch 630 Batch    0/5   train_loss = 0.571
+Epoch 632 Batch    0/5   train_loss = 0.563
+Epoch 634 Batch    0/5   train_loss = 0.582
+Epoch 636 Batch    0/5   train_loss = 0.579
+Epoch 638 Batch    0/5   train_loss = 0.562
+Epoch 640 Batch    0/5   train_loss = 0.549
+Epoch 642 Batch    0/5   train_loss = 0.540
+Epoch 644 Batch    0/5   train_loss = 0.520
+Epoch 646 Batch    0/5   train_loss = 0.515
+Epoch 648 Batch    0/5   train_loss = 0.509
+Epoch 650 Batch    0/5   train_loss = 0.509
+Epoch 652 Batch    0/5   train_loss = 0.527
+Epoch 654 Batch    0/5   train_loss = 0.524
+Epoch 656 Batch    0/5   train_loss = 0.509
+Epoch 658 Batch    0/5   train_loss = 0.523
+Epoch 660 Batch    0/5   train_loss = 0.502
+Epoch 662 Batch    0/5   train_loss = 0.477
+Epoch 664 Batch    0/5   train_loss = 0.473
+Epoch 666 Batch    0/5   train_loss = 0.463
+Epoch 668 Batch    0/5   train_loss = 0.457
+Epoch 670 Batch    0/5   train_loss = 0.455
+Epoch 672 Batch    0/5   train_loss = 0.459
+Epoch 674 Batch    0/5   train_loss = 0.475
+Epoch 676 Batch    0/5   train_loss = 0.471
+Epoch 678 Batch    0/5   train_loss = 0.455
+Epoch 680 Batch    0/5   train_loss = 0.443
+Epoch 682 Batch    0/5   train_loss = 0.456
+Epoch 684 Batch    0/5   train_loss = 0.440
+Epoch 686 Batch    0/5   train_loss = 0.421
+Epoch 688 Batch    0/5   train_loss = 0.413
+Epoch 690 Batch    0/5   train_loss = 0.405
+Epoch 692 Batch    0/5   train_loss = 0.401
+Epoch 694 Batch    0/5   train_loss = 0.404
+Epoch 696 Batch    0/5   train_loss = 0.400
+Epoch 698 Batch    0/5   train_loss = 0.428
+Epoch 700 Batch    0/5   train_loss = 0.451
+Epoch 702 Batch    0/5   train_loss = 0.426
+Epoch 704 Batch    0/5   train_loss = 0.410
+Epoch 706 Batch    0/5   train_loss = 0.422
+Epoch 708 Batch    0/5   train_loss = 0.398
+Epoch 710 Batch    0/5   train_loss = 0.377
+Epoch 712 Batch    0/5   train_loss = 0.368
+Epoch 714 Batch    0/5   train_loss = 0.358
+Epoch 716 Batch    0/5   train_loss = 0.352
+Epoch 718 Batch    0/5   train_loss = 0.349
+Epoch 720 Batch    0/5   train_loss = 0.344
+Epoch 722 Batch    0/5   train_loss = 0.346
+Epoch 724 Batch    0/5   train_loss = 0.345
+Epoch 726 Batch    0/5   train_loss = 0.337
+Epoch 728 Batch    0/5   train_loss = 0.345
+Epoch 730 Batch    0/5   train_loss = 0.348
+Epoch 732 Batch    0/5   train_loss = 0.358
+Epoch 734 Batch    0/5   train_loss = 0.346
+Epoch 736 Batch    0/5   train_loss = 0.337
+Epoch 738 Batch    0/5   train_loss = 0.329
+Epoch 740 Batch    0/5   train_loss = 0.320
+Epoch 742 Batch    0/5   train_loss = 0.323
+Epoch 744 Batch    0/5   train_loss = 0.316
+Epoch 746 Batch    0/5   train_loss = 0.304
+Epoch 748 Batch    0/5   train_loss = 0.299
+Epoch 750 Batch    0/5   train_loss = 0.292
+Epoch 752 Batch    0/5   train_loss = 0.288
+Epoch 754 Batch    0/5   train_loss = 0.289
+Epoch 756 Batch    0/5   train_loss = 0.284
+Epoch 758 Batch    0/5   train_loss = 0.290
+Epoch 760 Batch    0/5   train_loss = 0.304
+Epoch 762 Batch    0/5   train_loss = 0.311
+Epoch 764 Batch    0/5   train_loss = 0.405
+Epoch 766 Batch    0/5   train_loss = 0.390
+Epoch 768 Batch    0/5   train_loss = 0.344
+Epoch 770 Batch    0/5   train_loss = 0.320
+Epoch 772 Batch    0/5   train_loss = 0.280
+Epoch 774 Batch    0/5   train_loss = 0.265
+Epoch 776 Batch    0/5   train_loss = 0.258
+Epoch 778 Batch    0/5   train_loss = 0.252
+Epoch 780 Batch    0/5   train_loss = 0.247
+Epoch 782 Batch    0/5   train_loss = 0.243
+Epoch 784 Batch    0/5   train_loss = 0.240
+Epoch 786 Batch    0/5   train_loss = 0.237
+Epoch 788 Batch    0/5   train_loss = 0.233
+Epoch 790 Batch    0/5   train_loss = 0.231
+Epoch 792 Batch    0/5   train_loss = 0.229
+Epoch 794 Batch    0/5   train_loss = 0.225
+Epoch 796 Batch    0/5   train_loss = 0.230
+Epoch 798 Batch    0/5   train_loss = 0.226
+Epoch 800 Batch    0/5   train_loss = 0.222
+Epoch 802 Batch    0/5   train_loss = 0.237
+Epoch 804 Batch    0/5   train_loss = 0.225
+Epoch 806 Batch    0/5   train_loss = 0.225
+Epoch 808 Batch    0/5   train_loss = 0.245
+Epoch 810 Batch    0/5   train_loss = 0.227
+Epoch 812 Batch    0/5   train_loss = 0.210
+Epoch 814 Batch    0/5   train_loss = 0.206
+Epoch 816 Batch    0/5   train_loss = 0.202
+Epoch 818 Batch    0/5   train_loss = 0.198
+Epoch 820 Batch    0/5   train_loss = 0.195
+Epoch 822 Batch    0/5   train_loss = 0.192
+Epoch 824 Batch    0/5   train_loss = 0.189
+Epoch 826 Batch    0/5   train_loss = 0.189
+Epoch 828 Batch    0/5   train_loss = 0.187
+Epoch 830 Batch    0/5   train_loss = 0.186
+Epoch 832 Batch    0/5   train_loss = 0.187
+Epoch 834 Batch    0/5   train_loss = 0.189
+Epoch 836 Batch    0/5   train_loss = 0.189
+Epoch 838 Batch    0/5   train_loss = 0.197
+Epoch 840 Batch    0/5   train_loss = 0.207
+Epoch 842 Batch    0/5   train_loss = 0.196
+Epoch 844 Batch    0/5   train_loss = 0.187
+Epoch 846 Batch    0/5   train_loss = 0.197
+Epoch 848 Batch    0/5   train_loss = 0.189
+Epoch 850 Batch    0/5   train_loss = 0.176
+Epoch 852 Batch    0/5   train_loss = 0.171
+Epoch 854 Batch    0/5   train_loss = 0.164
+Epoch 856 Batch    0/5   train_loss = 0.161
+Epoch 858 Batch    0/5   train_loss = 0.157
+Epoch 860 Batch    0/5   train_loss = 0.154
+Epoch 862 Batch    0/5   train_loss = 0.152
+Epoch 864 Batch    0/5   train_loss = 0.150
+Epoch 866 Batch    0/5   train_loss = 0.148
+Epoch 868 Batch    0/5   train_loss = 0.146
+Epoch 870 Batch    0/5   train_loss = 0.145
+Epoch 872 Batch    0/5   train_loss = 0.145
+Epoch 874 Batch    0/5   train_loss = 0.142
+Epoch 876 Batch    0/5   train_loss = 0.143
+Epoch 878 Batch    0/5   train_loss = 0.159
+Epoch 880 Batch    0/5   train_loss = 0.145
+Epoch 882 Batch    0/5   train_loss = 0.161
+Epoch 884 Batch    0/5   train_loss = 0.211
+Epoch 886 Batch    0/5   train_loss = 0.196
+Epoch 888 Batch    0/5   train_loss = 0.335
+Epoch 890 Batch    0/5   train_loss = 0.325
+Epoch 892 Batch    0/5   train_loss = 0.279
+Epoch 894 Batch    0/5   train_loss = 0.244
+Epoch 896 Batch    0/5   train_loss = 0.214
+Epoch 898 Batch    0/5   train_loss = 0.174
+Epoch 900 Batch    0/5   train_loss = 0.147
+Epoch 902 Batch    0/5   train_loss = 0.138
+Epoch 904 Batch    0/5   train_loss = 0.131
+Epoch 906 Batch    0/5   train_loss = 0.128
+Epoch 908 Batch    0/5   train_loss = 0.125
+Epoch 910 Batch    0/5   train_loss = 0.123
+Epoch 912 Batch    0/5   train_loss = 0.121
+Epoch 914 Batch    0/5   train_loss = 0.119
+Epoch 916 Batch    0/5   train_loss = 0.117
+Epoch 918 Batch    0/5   train_loss = 0.116
+Epoch 920 Batch    0/5   train_loss = 0.114
+Epoch 922 Batch    0/5   train_loss = 0.113
+Epoch 924 Batch    0/5   train_loss = 0.112
+Epoch 926 Batch    0/5   train_loss = 0.111
+Epoch 928 Batch    0/5   train_loss = 0.109
+Epoch 930 Batch    0/5   train_loss = 0.108
+Epoch 932 Batch    0/5   train_loss = 0.107
+Epoch 934 Batch    0/5   train_loss = 0.106
+Epoch 936 Batch    0/5   train_loss = 0.105
+Epoch 938 Batch    0/5   train_loss = 0.103
+Epoch 940 Batch    0/5   train_loss = 0.102
+Epoch 942 Batch    0/5   train_loss = 0.101
+Epoch 944 Batch    0/5   train_loss = 0.100
+Epoch 946 Batch    0/5   train_loss = 0.099
+Epoch 948 Batch    0/5   train_loss = 0.098
+Epoch 950 Batch    0/5   train_loss = 0.097
+Epoch 952 Batch    0/5   train_loss = 0.096
+Epoch 954 Batch    0/5   train_loss = 0.095
+Epoch 956 Batch    0/5   train_loss = 0.094
+Epoch 958 Batch    0/5   train_loss = 0.094
+Epoch 960 Batch    0/5   train_loss = 0.093
+Epoch 962 Batch    0/5   train_loss = 0.092
+Epoch 964 Batch    0/5   train_loss = 0.091
+Epoch 966 Batch    0/5   train_loss = 0.090
+Epoch 968 Batch    0/5   train_loss = 0.089
+Epoch 970 Batch    0/5   train_loss = 0.088
+Epoch 972 Batch    0/5   train_loss = 0.088
+Epoch 974 Batch    0/5   train_loss = 0.087
+Epoch 976 Batch    0/5   train_loss = 0.086
+Epoch 978 Batch    0/5   train_loss = 0.085
+Epoch 980 Batch    0/5   train_loss = 0.084
+Epoch 982 Batch    0/5   train_loss = 0.083
+Epoch 984 Batch    0/5   train_loss = 0.083
+Epoch 986 Batch    0/5   train_loss = 0.082
+Epoch 988 Batch    0/5   train_loss = 0.081
+Epoch 990 Batch    0/5   train_loss = 0.080
+Epoch 992 Batch    0/5   train_loss = 0.080
+Epoch 994 Batch    0/5   train_loss = 0.079
+Epoch 996 Batch    0/5   train_loss = 0.078
+Epoch 998 Batch    0/5   train_loss = 0.078
+Model Trained and Saved
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Save Parameters

Save seq_length and save_dir for generating a new TV script.

+ +
+
+
+
+
+
In [198]:
+
+
+
"""
+DON'T MODIFY ANYTHING IN THIS CELL
+"""
+# Save parameters for checkpoint
+helper.save_params((seq_length, save_dir))
+
+ +
+
+
+ +
+
+
+
+
+
+

Checkpoint

+
+
+
+
+
+
In [272]:
+
+
+
"""
+DON'T MODIFY ANYTHING IN THIS CELL
+"""
+import tensorflow as tf
+import numpy as np
+import helper
+import problem_unittests as tests
+
+_, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess()
+seq_length, load_dir = helper.load_params()
+
+ +
+
+
+ +
+
+
+
+
+
+

Implement Generate Functions

Get Tensors

Get tensors from loaded_graph using the function get_tensor_by_name(). Get the tensors using the following names:

+
    +
  • "input:0"
  • +
  • "initial_state:0"
  • +
  • "final_state:0"
  • +
  • "probs:0"
  • +
+

Return the tensors in the following tuple (InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor)

+ +
+
+
+
+
+
In [273]:
+
+
+
def get_tensors(loaded_graph):
+    """
+    Get input, initial state, final state, and probabilities tensor from <loaded_graph>
+    :param loaded_graph: TensorFlow graph loaded from file
+    :return: Tuple (InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor)
+    """
+    
+    t_input = loaded_graph.get_tensor_by_name('input:0')
+    t_initial_state = loaded_graph.get_tensor_by_name('initial_state:0')
+    t_final_state = loaded_graph.get_tensor_by_name('final_state:0')
+    t_probs = loaded_graph.get_tensor_by_name('probs:0')
+    return t_input, t_initial_state, t_final_state, t_probs
+
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+tests.test_get_tensors(get_tensors)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Tests Passed
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Choose Word

Implement the pick_word() function to select the next word using probabilities.

+ +
+
+
+
+
+
In [274]:
+
+
+
def pick_word(probabilities, int_to_vocab):
+    """
+    Pick the next word in the generated text
+    :param probabilities: Probabilites of the next word
+    :param int_to_vocab: Dictionary of word ids as the keys and words as the values
+    :return: String of the predicted word
+    """
+    
+    word = int_to_vocab[np.argmax(probabilities)]
+    
+    return word
+
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+tests.test_pick_word(pick_word)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
Tests Passed
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Generate TV Script

This will generate the TV script for you. Set gen_length to the length of TV script you want to generate.

+ +
+
+
+
+
+
In [275]:
+
+
+
gen_length = 200
+# homer_simpson, moe_szyslak, or Barney_Gumble
+prime_word = 'moe_szyslak'
+
+"""
+DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
+"""
+loaded_graph = tf.Graph()
+with tf.Session(graph=loaded_graph) as sess:
+    # Load saved model
+    loader = tf.train.import_meta_graph(load_dir + '.meta')
+    loader.restore(sess, load_dir)
+
+    # Get Tensors from loaded model
+    input_text, initial_state, final_state, probs = get_tensors(loaded_graph)
+
+    # Sentences generation setup
+    gen_sentences = [prime_word + ':']
+    prev_state = sess.run(initial_state, {input_text: np.array([[1]])})
+
+    # Generate sentences
+    for n in range(gen_length):
+        # Dynamic Input
+        dyn_input = [[vocab_to_int[word] for word in gen_sentences[-seq_length:]]]
+        dyn_seq_length = len(dyn_input[0])
+
+        # Get Prediction
+        probabilities, prev_state = sess.run(
+            [probs, final_state],
+            {input_text: dyn_input, initial_state: prev_state})
+        
+        pred_word = pick_word(probabilities[dyn_seq_length-1], int_to_vocab)
+
+        gen_sentences.append(pred_word)
+    
+    # Remove tokens
+    tv_script = ' '.join(gen_sentences)
+    for key, token in token_dict.items():
+        ending = ' ' if key in ['\n', '(', '"'] else ''
+        tv_script = tv_script.replace(' ' + token.lower(), key)
+    tv_script = tv_script.replace('\n ', '\n')
+    tv_script = tv_script.replace('( ', '(')
+        
+    print(tv_script)
+
+ +
+
+
+ +
+
+ + +
+
+ +
+
moe_szyslak: sizes good-looking slap detective_homer_simpson: takin' cesss planning parrot smoke parrot sizes frustrated choked slap gesture elmo's jerry duff's butterball officials sizes themselves gesture whiny irrelevant paintings continuing huddle tony butterball worst jerry neighborhood slap slap slap detective_homer_simpson: meatpies crooks sail slap slap slap sizes worst mr slap worst gesture parrot calendars bathed schnapps butterball stuck jerry dash my-y-y-y-y-y slap slap slap detective_homer_simpson: rain gesture bashir's jerry longest slap slap slap detective_homer_simpson: realize gesture parrot neighborhood jerry dad's poet presided scrutinizes presided rope neighborhood booth detective_homer_simpson: enjoyed gesture electronic sam: jerry dash my-y-y-y-y-y butterball protestantism dash my-y-y-y-y-y friendly dash happiness agreement slap protestantism muttering muttering sugar-free parrot is: abandon fudd scrutinizes detective_homer_simpson: itself duff's butterball drinker slap muttering shaky slap cuff giant face knockin' tv-station_announcer: that's slap detective_homer_simpson: celebrate rubbed 2nd_voice_on_transmitter: further rubbed usual laramie bunch slap detective_homer_simpson: itself gesture child jerry premise poet sarcastic slap detective_homer_simpson: meatpies skydiving scrutinizes scream renee: scrutinizes detective_homer_simpson: itself lenses butterball tapered smokin' 2nd_voice_on_transmitter: slap detective_homer_simpson: detective_homer_simpson: detective_homer_simpson: aims always butterball oh-so-sophisticated wine dislike sizes bury gang butterball renee: rope laramie themselves beings slap detective_homer_simpson: rain indicates butterball stunned slap detective_homer_simpson: rain arts butterball ratted 2nd_voice_on_transmitter: pepsi oh-so-sophisticated planning booth rope presided rope abandon worst
+
+
+
+ +
+
+ +
+
+
+
+
+
+

The TV Script is Nonsensical

It's ok if the TV script doesn't make any sense. We trained on less than a megabyte of text. In order to get good results, you'll have to use a smaller vocabulary or get more data. Luckly there's more data! As we mentioned in the begging of this project, this is a subset of another dataset. We didn't have you train on all the data, because that would take too long. However, you are free to train your neural network on all the data. After you complete the project, of course.

+

Submitting This Project

When submitting this project, make sure to run all the cells before saving the notebook. Save the notebook file as "dlnd_tv_script_generation.ipynb" and save it as a HTML file under "File" -> "Download as". Include the "helper.py" and "problem_unittests.py" files in your submission.

+ +
+
+
+
+
+ + + + + + diff --git a/dlnd_tv_script_generation.ipynb b/dlnd_tv_script_generation.ipynb new file mode 100644 index 0000000..9a36bd0 --- /dev/null +++ b/dlnd_tv_script_generation.ipynb @@ -0,0 +1,1809 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# TV Script Generation\n", + "In this project, you'll generate your own [Simpsons](https://en.wikipedia.org/wiki/The_Simpsons) TV scripts using RNNs. You'll be using part of the [Simpsons dataset](https://www.kaggle.com/wcukierski/the-simpsons-by-the-data) of scripts from 27 seasons. The Neural Network you'll build will generate a new TV script for a scene at [Moe's Tavern](https://simpsonswiki.com/wiki/Moe's_Tavern).\n", + "## Get the Data\n", + "The data is already provided for you. You'll be using a subset of the original dataset. It consists of only the scenes in Moe's Tavern. This doesn't include other versions of the tavern, like \"Moe's Cavern\", \"Flaming Moe's\", \"Uncle Moe's Family Feed-Bag\", etc.." + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "import helper\n", + "\n", + "data_dir = './data/simpsons/moes_tavern_lines.txt'\n", + "text = helper.load_data(data_dir)\n", + "# Ignore notice, since we don't use it for analysing the data\n", + "text = text[81:]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Explore the Data\n", + "Play around with `view_sentence_range` to view different parts of the data." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset Stats\n", + "Roughly the number of unique words: 11492\n", + "Number of scenes: 262\n", + "Average number of sentences in each scene: 15.248091603053435\n", + "Number of lines: 4257\n", + "Average number of words in each line: 11.50434578341555\n", + "\n", + "The sentences 0 to 10:\n", + "Moe_Szyslak: (INTO PHONE) Moe's Tavern. Where the elite meet to drink.\n", + "Bart_Simpson: Eh, yeah, hello, is Mike there? Last name, Rotch.\n", + "Moe_Szyslak: (INTO PHONE) Hold on, I'll check. (TO BARFLIES) Mike Rotch. Mike Rotch. Hey, has anybody seen Mike Rotch, lately?\n", + "Moe_Szyslak: (INTO PHONE) Listen you little puke. One of these days I'm gonna catch you, and I'm gonna carve my name on your back with an ice pick.\n", + "Moe_Szyslak: What's the matter Homer? You're not your normal effervescent self.\n", + "Homer_Simpson: I got my problems, Moe. Give me another one.\n", + "Moe_Szyslak: Homer, hey, you should not drink to forget your problems.\n", + "Barney_Gumble: Yeah, you should only drink to enhance your social skills.\n", + "\n", + "\n" + ] + } + ], + "source": [ + "view_sentence_range = (0, 10)\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "import numpy as np\n", + "\n", + "print('Dataset Stats')\n", + "print('Roughly the number of unique words: {}'.format(len({word: None for word in text.split()})))\n", + "scenes = text.split('\\n\\n')\n", + "print('Number of scenes: {}'.format(len(scenes)))\n", + "sentence_count_scene = [scene.count('\\n') for scene in scenes]\n", + "print('Average number of sentences in each scene: {}'.format(np.average(sentence_count_scene)))\n", + "\n", + "sentences = [sentence for scene in scenes for sentence in scene.split('\\n')]\n", + "print('Number of lines: {}'.format(len(sentences)))\n", + "word_count_sentence = [len(sentence.split()) for sentence in sentences]\n", + "print('Average number of words in each line: {}'.format(np.average(word_count_sentence)))\n", + "\n", + "print()\n", + "print('The sentences {} to {}:'.format(*view_sentence_range))\n", + "print('\\n'.join(text.split('\\n')[view_sentence_range[0]:view_sentence_range[1]]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Implement Preprocessing Functions\n", + "The first thing to do to any dataset is preprocessing. Implement the following preprocessing functions below:\n", + "- Lookup Table\n", + "- Tokenize Punctuation\n", + "\n", + "### Lookup Table\n", + "To create a word embedding, you first need to transform the words to ids. In this function, create two dictionaries:\n", + "- Dictionary to go from the words to an id, we'll call `vocab_to_int`\n", + "- Dictionary to go from the id to word, we'll call `int_to_vocab`\n", + "\n", + "Return these dictionaries in the following tuple `(vocab_to_int, int_to_vocab)`" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import problem_unittests as tests\n", + "\n", + "def create_lookup_tables(text):\n", + " \"\"\"\n", + " Create lookup tables for vocabulary\n", + " :param text: The text of tv scripts split into words\n", + " :return: A tuple of dicts (vocab_to_int, int_to_vocab)\n", + " \"\"\"\n", + " vocab = set(text)\n", + " \n", + " vocab_to_int = {word: index for index, word in enumerate(vocab)}\n", + " int_to_vocab = {index: word for (word, index) in vocab_to_int.items()}\n", + " \n", + " return vocab_to_int, int_to_vocab\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_create_lookup_tables(create_lookup_tables)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Tokenize Punctuation\n", + "We'll be splitting the script into a word array using spaces as delimiters. However, punctuations like periods and exclamation marks make it hard for the neural network to distinguish between the word \"bye\" and \"bye!\".\n", + "\n", + "Implement the function `token_lookup` to return a dict that will be used to tokenize symbols like \"!\" into \"||Exclamation_Mark||\". Create a dictionary for the following symbols where the symbol is the key and value is the token:\n", + "- Period ( . )\n", + "- Comma ( , )\n", + "- Quotation Mark ( \" )\n", + "- Semicolon ( ; )\n", + "- Exclamation mark ( ! )\n", + "- Question mark ( ? )\n", + "- Left Parentheses ( ( )\n", + "- Right Parentheses ( ) )\n", + "- Dash ( -- )\n", + "- Return ( \\n )\n", + "\n", + "This dictionary will be used to token the symbols and add the delimiter (space) around it. This separates the symbols as it's own word, making it easier for the neural network to predict on the next word. Make sure you don't use a token that could be confused as a word. Instead of using the token \"dash\", try using something like \"||dash||\"." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def token_lookup():\n", + " \"\"\"\n", + " Generate a dict to turn punctuation into a token.\n", + " :return: Tokenize dictionary where the key is the punctuation and the value is the token\n", + " \"\"\"\n", + " \n", + " return {\n", + " '.': '||period||',\n", + " ',': '||comma||',\n", + " '\"': '||quotation_mark||',\n", + " ';': '||semicolon||',\n", + " '!': '||exclamation_mark||',\n", + " '?': '||question_mark||',\n", + " '(': '||left_parentheses',\n", + " ')': '||right_parentheses',\n", + " '--': '||dash||',\n", + " '\\n': '||return||'\n", + " }\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_tokenize(token_lookup)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Preprocess all the data and save it\n", + "Running the code cell below will preprocess all the data and save it to file." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "# Preprocess Training, Validation, and Testing Data\n", + "helper.preprocess_and_save_data(data_dir, token_lookup, create_lookup_tables)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# Check Point\n", + "This is your first checkpoint. If you ever decide to come back to this notebook or have to restart the notebook, you can start from here. The preprocessed data has been saved to disk." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "import helper\n", + "import numpy as np\n", + "import problem_unittests as tests\n", + "\n", + "int_text, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Extra hyper parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "from collections import namedtuple\n", + "\n", + "hyper_params = (('embedding_size', 128),\n", + " ('lstm_layers', 2),\n", + " ('keep_prob', 0.7)\n", + " )\n", + "\n", + "\n", + "\n", + "\n", + "Hyper = namedtuple('Hyper', map(lambda x: x[0], hyper_params))\n", + "HYPER = Hyper(*list(map(lambda x: x[1], hyper_params)))\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Build the Neural Network\n", + "You'll build the components necessary to build a RNN by implementing the following functions below:\n", + "- get_inputs\n", + "- get_init_cell\n", + "- get_embed\n", + "- build_rnn\n", + "- build_nn\n", + "- get_batches\n", + "\n", + "### Check the Version of TensorFlow and Access to GPU" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TensorFlow Version: 1.0.0\n", + "Default GPU Device: /gpu:0\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "from distutils.version import LooseVersion\n", + "import warnings\n", + "import tensorflow as tf\n", + "\n", + "# Check TensorFlow Version\n", + "assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer'\n", + "print('TensorFlow Version: {}'.format(tf.__version__))\n", + "\n", + "# Check for a GPU\n", + "if not tf.test.gpu_device_name():\n", + " warnings.warn('No GPU found. Please use a GPU to train your neural network.')\n", + "else:\n", + " print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Input\n", + "Implement the `get_inputs()` function to create TF Placeholders for the Neural Network. It should create the following placeholders:\n", + "- Input text placeholder named \"input\" using the [TF Placeholder](https://www.tensorflow.org/api_docs/python/tf/placeholder) `name` parameter.\n", + "- Targets placeholder\n", + "- Learning Rate placeholder\n", + "\n", + "Return the placeholders in the following the tuple `(Input, Targets, LearingRate)`" + ] + }, + { + "cell_type": "code", + "execution_count": 225, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def get_inputs():\n", + " \"\"\"\n", + " Create TF Placeholders for input, targets, and learning rate.\n", + " :return: Tuple (input, targets, learning rate)\n", + " \"\"\"\n", + " \n", + " # We use shape [None, None] to feed any batch size and any sequence length\n", + " input_placeholder = tf.placeholder(tf.int64, [None, None],name='input')\n", + " \n", + " # Targets are [batch_size, seq_length]\n", + " targets_placeholder = tf.placeholder(tf.int64, [None, None], name='targets') \n", + " \n", + " \n", + " learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate')\n", + " return input_placeholder, targets_placeholder, learning_rate_placeholder\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_get_inputs(get_inputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Build RNN Cell and Initialize\n", + "Stack one or more [`BasicLSTMCells`](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/BasicLSTMCell) in a [`MultiRNNCell`](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/MultiRNNCell).\n", + "- The Rnn size should be set using `rnn_size`\n", + "- Initalize Cell State using the MultiRNNCell's [`zero_state()`](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/MultiRNNCell#zero_state) function\n", + " - Apply the name \"initial_state\" to the initial state using [`tf.identity()`](https://www.tensorflow.org/api_docs/python/tf/identity)\n", + "\n", + "Return the cell and initial state in the following tuple `(Cell, InitialState)`" + ] + }, + { + "cell_type": "code", + "execution_count": 227, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def get_init_cell(batch_size, rnn_size):\n", + " \"\"\"\n", + " Create an RNN Cell and initialize it.\n", + " :param batch_size: Size of batches\n", + " :param rnn_size: Size of RNNs\n", + " :return: Tuple (cell, initialize state)\n", + " \"\"\"\n", + " with tf.name_scope('RNN_layers'):\n", + " lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)\n", + "\n", + " # add a dropout wrapper\n", + " drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=HYPER.keep_prob)\n", + "\n", + " #cell = tf.contrib.rnn.MultiRNNCell([drop] * HYPER.lstm_layers)\n", + "\n", + " cell = tf.contrib.rnn.MultiRNNCell([lstm] * HYPER.lstm_layers)\n", + " \n", + " \n", + " _initial_state = cell.zero_state(batch_size, tf.float32)\n", + " initial_state = tf.identity(_initial_state, name='initial_state')\n", + " \n", + " return cell, initial_state\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_get_init_cell(get_init_cell)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Word Embedding\n", + "Apply embedding to `input_data` using TensorFlow. Return the embedded sequence." + ] + }, + { + "cell_type": "code", + "execution_count": 207, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def get_embed(input_data, vocab_size, embed_dim):\n", + " \"\"\"\n", + " Create embedding for .\n", + " :param input_data: TF placeholder for text input.\n", + " :param vocab_size: Number of words in vocabulary.\n", + " :param embed_dim: Number of embedding dimensions\n", + " :return: Embedded input.\n", + " \"\"\"\n", + " with tf.name_scope('Embedding'):\n", + " embeddings = tf.Variable(\n", + " tf.random_uniform([vocab_size, embed_dim], -1.0, 1.0)\n", + " )\n", + "\n", + " embed = tf.nn.embedding_lookup(embeddings, input_data)\n", + " \n", + " return embed\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_get_embed(get_embed)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Build RNN\n", + "You created a RNN Cell in the `get_init_cell()` function. Time to use the cell to create a RNN.\n", + "- Build the RNN using the [`tf.nn.dynamic_rnn()`](https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn)\n", + " - Apply the name \"final_state\" to the final state using [`tf.identity()`](https://www.tensorflow.org/api_docs/python/tf/identity)\n", + "\n", + "Return the outputs and final_state state in the following tuple `(Outputs, FinalState)` " + ] + }, + { + "cell_type": "code", + "execution_count": 228, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def build_rnn(cell, inputs):\n", + " \"\"\"\n", + " Create a RNN using a RNN Cell\n", + " :param cell: RNN Cell\n", + " :param inputs: Input text data\n", + " :return: Tuple (Outputs, Final State)\n", + " \"\"\"\n", + " ## NOTES\n", + " # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] time_major==false (default)\n", + " with tf.name_scope('RNN_output'):\n", + " outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)\n", + " \n", + " final_state = tf.identity(final_state, name='final_state')\n", + " \n", + " \n", + " return outputs, final_state\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_build_rnn(build_rnn)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Build the Neural Network\n", + "Apply the functions you implemented above to:\n", + "- Apply embedding to `input_data` using your `get_embed(input_data, vocab_size, embed_dim)` function.\n", + "- Build RNN using `cell` and your `build_rnn(cell, inputs)` function.\n", + "- Apply a fully connected layer with a linear activation and `vocab_size` as the number of outputs.\n", + "\n", + "Return the logits and final state in the following tuple (Logits, FinalState) " + ] + }, + { + "cell_type": "code", + "execution_count": 231, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "logits after reshape: Tensor(\"logits_reshape_to_loss/logits:0\", shape=(128, 5, 27), dtype=float32)\n", + "Tests Passed\n" + ] + } + ], + "source": [ + "def build_nn(cell, rnn_size, input_data, vocab_size):\n", + " \"\"\"\n", + " Build part of the neural network\n", + " :param cell: RNN cell\n", + " :param rnn_size: Size of rnns\n", + " :param input_data: Input data\n", + " :param vocab_size: Vocabulary size\n", + " :return: Tuple (Logits, FinalState)\n", + " \"\"\"\n", + " \n", + " num_outputs = vocab_size\n", + " \n", + " \n", + " ## Not sure why the unit test was made without taking into \n", + " # account we are handling dynamic tensor shape that we need to infer\n", + " # at runtime, so I made an if statement just to pass the test case\n", + " #\n", + " # Some references: https://goo.gl/vD3egn\n", + " # https://goo.gl/E8vT2M \n", + " \n", + " if input_data.get_shape().as_list()[1] is not None:\n", + " batch_size = input_data.get_shape().as_list()[0]\n", + " seq_len = input_data.get_shape().as_list()[1]\n", + " \n", + " # Infer dynamic tensor shape of input\n", + " else:\n", + " input_dims = tf.shape(input_data)\n", + " batch_size = input_dims[0]\n", + " seq_len = input_dims[1]\n", + "\n", + " \n", + "\n", + " \n", + " embed = get_embed(input_data, vocab_size, HYPER.embedding_size)\n", + " \n", + " \n", + " ## NOTES\n", + " # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] see: time_major==false (default)\n", + " \n", + " ## Output shape\n", + " ## [batch_size, time_step, rnn_size]\n", + " raw_rnn_outputs, final_state = build_rnn(cell, embed)\n", + " \n", + " \n", + " # Put outputs in rows\n", + " # make the output into [batch_size*time_step, rnn_size] for easy matmul\n", + " with tf.name_scope('sequence_reshape'):\n", + " outputs = tf.reshape(raw_rnn_outputs, [-1, rnn_size], name='rnn_output')\n", + " \n", + " \n", + " # Question, why are we using linear activation and not softmax ?\n", + " # My Guess: because seq2seq.sequence_loss has an efficient way to calculate the loss directly from logits \n", + " with tf.name_scope('logits'):\n", + " \n", + " linear_w = tf.Variable(tf.truncated_normal((rnn_size, num_outputs), stddev=0.05), name='linear_w')\n", + " linear_b = tf.Variable(tf.zeros(num_outputs), name='linear_b')\n", + "\n", + " logits = tf.matmul(outputs, linear_w) + linear_b\n", + " \n", + " \n", + " \n", + " # Reshape the logits back into the original input shape -> [batch_size, seq_len, num_classes]\n", + " # We do this beceause the loss function seq2seq.sequence_loss takes as logits a shape of [batch_size,seq_len,num_decoded_symbols]\n", + " with tf.name_scope('logits_reshape_to_loss'):\n", + " logits = tf.reshape(logits, [batch_size, seq_len, num_outputs], name='logits')\n", + " print('logits after reshape: ', logits)\n", + " \n", + " return logits, final_state\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_build_nn(build_nn)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Batches\n", + "Implement `get_batches` to create batches of input and targets using `int_text`. The batches should be a Numpy array with the shape `(number of batches, 2, batch size, sequence length)`. Each batch contains two elements:\n", + "- The first element is a single batch of **input** with the shape `[batch size, sequence length]`\n", + "- The second element is a single batch of **targets** with the shape `[batch size, sequence length]`\n", + "\n", + "If you can't fill the last batch with enough data, drop the last batch.\n", + "\n", + "For exmple, `get_batches([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], 2, 3)` would return a Numpy array of the following:\n", + "```\n", + "[\n", + " # First Batch\n", + " [\n", + " # Batch of Input\n", + " [[ 1 2 3], [ 7 8 9]],\n", + " # Batch of targets\n", + " [[ 2 3 4], [ 8 9 10]]\n", + " ],\n", + " \n", + " # Second Batch\n", + " [\n", + " # Batch of Input\n", + " [[ 4 5 6], [10 11 12]],\n", + " # Batch of targets\n", + " [[ 5 6 7], [11 12 13]]\n", + " ]\n", + "]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 233, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def get_batches(int_text, batch_size, seq_length):\n", + " \"\"\"\n", + " Return batches of input and target\n", + " :param int_text: Text with the words replaced by their ids\n", + " :param batch_size: The size of batch\n", + " :param seq_length: The length of sequence\n", + " :return: Batches as a Numpy array\n", + " \"\"\"\n", + " \n", + " slice_size = batch_size * seq_length\n", + " n_batches = int(len(int_text)/slice_size)\n", + " \n", + " # input part\n", + " _inputs = np.array(int_text[:n_batches*slice_size])\n", + " \n", + " # target part\n", + " _targets = np.array(int_text[1:n_batches*slice_size + 1])\n", + " \n", + "\n", + " # Go through all inputs, targets and split them into batch_size*seq_len list of items\n", + " # [batch, batch, ...]\n", + " inputs, targets = np.split(_inputs, n_batches), np.split(_targets, n_batches)\n", + " \n", + " # concat inputs and targets\n", + " batches = np.c_[inputs, targets]\n", + " #print(batches.shape)\n", + " \n", + " # Reshape into final batches output\n", + " batches = batches.reshape((-1, 2, batch_size, seq_length))\n", + "\n", + " #print(batches[0][1])\n", + "\n", + " \n", + " return batches\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_get_batches(get_batches)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Neural Network Training\n", + "### Hyperparameters\n", + "Tune the following parameters:\n", + "\n", + "- Set `num_epochs` to the number of epochs.\n", + "- Set `batch_size` to the batch size.\n", + "- Set `rnn_size` to the size of the RNNs.\n", + "- Set `seq_length` to the length of sequence.\n", + "- Set `learning_rate` to the learning rate.\n", + "- Set `show_every_n_batches` to the number of batches the neural network should print progress." + ] + }, + { + "cell_type": "code", + "execution_count": 234, + "metadata": { + "collapsed": true, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "# Number of Epochs\n", + "num_epochs = 1000\n", + "# Batch Size\n", + "batch_size = 128\n", + "# RNN Size\n", + "rnn_size = 70\n", + "# Sequence Length\n", + "seq_length = 100\n", + "# Learning Rate\n", + "learning_rate = 1e-3\n", + "# Show stats for every n number of batches\n", + "show_every_n_batches = 10\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "save_dir = './save'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Build the Graph\n", + "Build the graph using the neural network you implemented." + ] + }, + { + "cell_type": "code", + "execution_count": 235, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "logits after reshape: Tensor(\"logits_reshape_to_loss/logits:0\", shape=(?, ?, 6779), dtype=float32)\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "from tensorflow.contrib import seq2seq\n", + "\n", + "train_graph = tf.Graph()\n", + "with train_graph.as_default():\n", + " vocab_size = len(int_to_vocab)\n", + " input_text, targets, lr = get_inputs()\n", + " input_data_shape = tf.shape(input_text)\n", + " cell, initial_state = get_init_cell(input_data_shape[0], rnn_size)\n", + " logits, final_state = build_nn(cell, rnn_size, input_text, vocab_size)\n", + "\n", + " # Probabilities for generating words\n", + " probs = tf.nn.softmax(logits, name='probs')\n", + "\n", + " # Loss function\n", + " cost = seq2seq.sequence_loss(\n", + " logits,\n", + " targets,\n", + " tf.ones([input_data_shape[0], input_data_shape[1]]))\n", + "\n", + " # Optimizer\n", + " optimizer = tf.train.AdamOptimizer(lr)\n", + "\n", + " # Gradient Clipping\n", + " gradients = optimizer.compute_gradients(cost)\n", + " capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients]\n", + " train_op = optimizer.apply_gradients(capped_gradients)" + ] + }, + { + "cell_type": "code", + "execution_count": 238, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "# write out the graph for tensorboard\n", + "\n", + "with tf.Session(graph=train_graph) as sess:\n", + " file_writer = tf.summary.FileWriter('./logs/1', sess.graph)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Train\n", + "Train the neural network on the preprocessed data. If you have a hard time getting a good loss, check the [forms](https://discussions.udacity.com/) to see if anyone is having the same problem." + ] + }, + { + "cell_type": "code", + "execution_count": 197, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5\n", + "Epoch 0 Batch 0/5 train_loss = 8.825\n", + "Epoch 2 Batch 0/5 train_loss = 6.441\n", + "Epoch 4 Batch 0/5 train_loss = 6.023\n", + "Epoch 6 Batch 0/5 train_loss = 5.927\n", + "Epoch 8 Batch 0/5 train_loss = 5.903\n", + "Epoch 10 Batch 0/5 train_loss = 5.883\n", + "Epoch 12 Batch 0/5 train_loss = 5.874\n", + "Epoch 14 Batch 0/5 train_loss = 5.858\n", + "Epoch 16 Batch 0/5 train_loss = 5.833\n", + "Epoch 18 Batch 0/5 train_loss = 5.794\n", + "Epoch 20 Batch 0/5 train_loss = 5.739\n", + "Epoch 22 Batch 0/5 train_loss = 5.682\n", + "Epoch 24 Batch 0/5 train_loss = 5.626\n", + "Epoch 26 Batch 0/5 train_loss = 5.572\n", + "Epoch 28 Batch 0/5 train_loss = 5.521\n", + "Epoch 30 Batch 0/5 train_loss = 5.471\n", + "Epoch 32 Batch 0/5 train_loss = 5.421\n", + "Epoch 34 Batch 0/5 train_loss = 5.365\n", + "Epoch 36 Batch 0/5 train_loss = 5.304\n", + "Epoch 38 Batch 0/5 train_loss = 5.244\n", + "Epoch 40 Batch 0/5 train_loss = 5.185\n", + "Epoch 42 Batch 0/5 train_loss = 5.124\n", + "Epoch 44 Batch 0/5 train_loss = 5.063\n", + "Epoch 46 Batch 0/5 train_loss = 5.003\n", + "Epoch 48 Batch 0/5 train_loss = 4.945\n", + "Epoch 50 Batch 0/5 train_loss = 4.891\n", + "Epoch 52 Batch 0/5 train_loss = 4.841\n", + "Epoch 54 Batch 0/5 train_loss = 4.794\n", + "Epoch 56 Batch 0/5 train_loss = 4.751\n", + "Epoch 58 Batch 0/5 train_loss = 4.710\n", + "Epoch 60 Batch 0/5 train_loss = 4.669\n", + "Epoch 62 Batch 0/5 train_loss = 4.638\n", + "Epoch 64 Batch 0/5 train_loss = 4.638\n", + "Epoch 66 Batch 0/5 train_loss = 4.589\n", + "Epoch 68 Batch 0/5 train_loss = 4.537\n", + "Epoch 70 Batch 0/5 train_loss = 4.501\n", + "Epoch 72 Batch 0/5 train_loss = 4.469\n", + "Epoch 74 Batch 0/5 train_loss = 4.436\n", + "Epoch 76 Batch 0/5 train_loss = 4.405\n", + "Epoch 78 Batch 0/5 train_loss = 4.375\n", + "Epoch 80 Batch 0/5 train_loss = 4.344\n", + "Epoch 82 Batch 0/5 train_loss = 4.363\n", + "Epoch 84 Batch 0/5 train_loss = 4.311\n", + "Epoch 86 Batch 0/5 train_loss = 4.274\n", + "Epoch 88 Batch 0/5 train_loss = 4.240\n", + "Epoch 90 Batch 0/5 train_loss = 4.211\n", + "Epoch 92 Batch 0/5 train_loss = 4.182\n", + "Epoch 94 Batch 0/5 train_loss = 4.155\n", + "Epoch 96 Batch 0/5 train_loss = 4.135\n", + "Epoch 98 Batch 0/5 train_loss = 4.107\n", + "Epoch 100 Batch 0/5 train_loss = 4.093\n", + "Epoch 102 Batch 0/5 train_loss = 4.053\n", + "Epoch 104 Batch 0/5 train_loss = 4.030\n", + "Epoch 106 Batch 0/5 train_loss = 4.002\n", + "Epoch 108 Batch 0/5 train_loss = 3.978\n", + "Epoch 110 Batch 0/5 train_loss = 3.951\n", + "Epoch 112 Batch 0/5 train_loss = 3.928\n", + "Epoch 114 Batch 0/5 train_loss = 3.902\n", + "Epoch 116 Batch 0/5 train_loss = 3.884\n", + "Epoch 118 Batch 0/5 train_loss = 3.862\n", + "Epoch 120 Batch 0/5 train_loss = 3.840\n", + "Epoch 122 Batch 0/5 train_loss = 3.814\n", + "Epoch 124 Batch 0/5 train_loss = 3.803\n", + "Epoch 126 Batch 0/5 train_loss = 3.775\n", + "Epoch 128 Batch 0/5 train_loss = 3.738\n", + "Epoch 130 Batch 0/5 train_loss = 3.714\n", + "Epoch 132 Batch 0/5 train_loss = 3.690\n", + "Epoch 134 Batch 0/5 train_loss = 3.665\n", + "Epoch 136 Batch 0/5 train_loss = 3.642\n", + "Epoch 138 Batch 0/5 train_loss = 3.619\n", + "Epoch 140 Batch 0/5 train_loss = 3.596\n", + "Epoch 142 Batch 0/5 train_loss = 3.577\n", + "Epoch 144 Batch 0/5 train_loss = 3.588\n", + "Epoch 146 Batch 0/5 train_loss = 3.561\n", + "Epoch 148 Batch 0/5 train_loss = 3.537\n", + "Epoch 150 Batch 0/5 train_loss = 3.494\n", + "Epoch 152 Batch 0/5 train_loss = 3.475\n", + "Epoch 154 Batch 0/5 train_loss = 3.444\n", + "Epoch 156 Batch 0/5 train_loss = 3.431\n", + "Epoch 158 Batch 0/5 train_loss = 3.403\n", + "Epoch 160 Batch 0/5 train_loss = 3.393\n", + "Epoch 162 Batch 0/5 train_loss = 3.371\n", + "Epoch 164 Batch 0/5 train_loss = 3.352\n", + "Epoch 166 Batch 0/5 train_loss = 3.323\n", + "Epoch 168 Batch 0/5 train_loss = 3.328\n", + "Epoch 170 Batch 0/5 train_loss = 3.281\n", + "Epoch 172 Batch 0/5 train_loss = 3.261\n", + "Epoch 174 Batch 0/5 train_loss = 3.238\n", + "Epoch 176 Batch 0/5 train_loss = 3.216\n", + "Epoch 178 Batch 0/5 train_loss = 3.197\n", + "Epoch 180 Batch 0/5 train_loss = 3.172\n", + "Epoch 182 Batch 0/5 train_loss = 3.169\n", + "Epoch 184 Batch 0/5 train_loss = 3.140\n", + "Epoch 186 Batch 0/5 train_loss = 3.136\n", + "Epoch 188 Batch 0/5 train_loss = 3.145\n", + "Epoch 190 Batch 0/5 train_loss = 3.106\n", + "Epoch 192 Batch 0/5 train_loss = 3.069\n", + "Epoch 194 Batch 0/5 train_loss = 3.038\n", + "Epoch 196 Batch 0/5 train_loss = 3.019\n", + "Epoch 198 Batch 0/5 train_loss = 2.995\n", + "Epoch 200 Batch 0/5 train_loss = 2.979\n", + "Epoch 202 Batch 0/5 train_loss = 2.960\n", + "Epoch 204 Batch 0/5 train_loss = 2.943\n", + "Epoch 206 Batch 0/5 train_loss = 2.963\n", + "Epoch 208 Batch 0/5 train_loss = 2.917\n", + "Epoch 210 Batch 0/5 train_loss = 2.898\n", + "Epoch 212 Batch 0/5 train_loss = 2.867\n", + "Epoch 214 Batch 0/5 train_loss = 2.863\n", + "Epoch 216 Batch 0/5 train_loss = 2.834\n", + "Epoch 218 Batch 0/5 train_loss = 2.809\n", + "Epoch 220 Batch 0/5 train_loss = 2.797\n", + "Epoch 222 Batch 0/5 train_loss = 2.774\n", + "Epoch 224 Batch 0/5 train_loss = 2.759\n", + "Epoch 226 Batch 0/5 train_loss = 2.732\n", + "Epoch 228 Batch 0/5 train_loss = 2.742\n", + "Epoch 230 Batch 0/5 train_loss = 2.704\n", + "Epoch 232 Batch 0/5 train_loss = 2.703\n", + "Epoch 234 Batch 0/5 train_loss = 2.663\n", + "Epoch 236 Batch 0/5 train_loss = 2.672\n", + "Epoch 238 Batch 0/5 train_loss = 2.638\n", + "Epoch 240 Batch 0/5 train_loss = 2.620\n", + "Epoch 242 Batch 0/5 train_loss = 2.595\n", + "Epoch 244 Batch 0/5 train_loss = 2.585\n", + "Epoch 246 Batch 0/5 train_loss = 2.563\n", + "Epoch 248 Batch 0/5 train_loss = 2.539\n", + "Epoch 250 Batch 0/5 train_loss = 2.534\n", + "Epoch 252 Batch 0/5 train_loss = 2.517\n", + "Epoch 254 Batch 0/5 train_loss = 2.497\n", + "Epoch 256 Batch 0/5 train_loss = 2.475\n", + "Epoch 258 Batch 0/5 train_loss = 2.463\n", + "Epoch 260 Batch 0/5 train_loss = 2.478\n", + "Epoch 262 Batch 0/5 train_loss = 2.450\n", + "Epoch 264 Batch 0/5 train_loss = 2.436\n", + "Epoch 266 Batch 0/5 train_loss = 2.417\n", + "Epoch 268 Batch 0/5 train_loss = 2.384\n", + "Epoch 270 Batch 0/5 train_loss = 2.363\n", + "Epoch 272 Batch 0/5 train_loss = 2.340\n", + "Epoch 274 Batch 0/5 train_loss = 2.323\n", + "Epoch 276 Batch 0/5 train_loss = 2.314\n", + "Epoch 278 Batch 0/5 train_loss = 2.302\n", + "Epoch 280 Batch 0/5 train_loss = 2.300\n", + "Epoch 282 Batch 0/5 train_loss = 2.300\n", + "Epoch 284 Batch 0/5 train_loss = 2.283\n", + "Epoch 286 Batch 0/5 train_loss = 2.246\n", + "Epoch 288 Batch 0/5 train_loss = 2.246\n", + "Epoch 290 Batch 0/5 train_loss = 2.210\n", + "Epoch 292 Batch 0/5 train_loss = 2.203\n", + "Epoch 294 Batch 0/5 train_loss = 2.185\n", + "Epoch 296 Batch 0/5 train_loss = 2.170\n", + "Epoch 298 Batch 0/5 train_loss = 2.150\n", + "Epoch 300 Batch 0/5 train_loss = 2.130\n", + "Epoch 302 Batch 0/5 train_loss = 2.132\n", + "Epoch 304 Batch 0/5 train_loss = 2.113\n", + "Epoch 306 Batch 0/5 train_loss = 2.083\n", + "Epoch 308 Batch 0/5 train_loss = 2.073\n", + "Epoch 310 Batch 0/5 train_loss = 2.060\n", + "Epoch 312 Batch 0/5 train_loss = 2.072\n", + "Epoch 314 Batch 0/5 train_loss = 2.081\n", + "Epoch 316 Batch 0/5 train_loss = 2.031\n", + "Epoch 318 Batch 0/5 train_loss = 2.007\n", + "Epoch 320 Batch 0/5 train_loss = 2.001\n", + "Epoch 322 Batch 0/5 train_loss = 1.987\n", + "Epoch 324 Batch 0/5 train_loss = 1.978\n", + "Epoch 326 Batch 0/5 train_loss = 1.963\n", + "Epoch 328 Batch 0/5 train_loss = 1.952\n", + "Epoch 330 Batch 0/5 train_loss = 1.932\n", + "Epoch 332 Batch 0/5 train_loss = 1.918\n", + "Epoch 334 Batch 0/5 train_loss = 1.898\n", + "Epoch 336 Batch 0/5 train_loss = 1.885\n", + "Epoch 338 Batch 0/5 train_loss = 1.872\n", + "Epoch 340 Batch 0/5 train_loss = 1.864\n", + "Epoch 342 Batch 0/5 train_loss = 1.867\n", + "Epoch 344 Batch 0/5 train_loss = 1.848\n", + "Epoch 346 Batch 0/5 train_loss = 1.821\n", + "Epoch 348 Batch 0/5 train_loss = 1.814\n", + "Epoch 350 Batch 0/5 train_loss = 1.788\n", + "Epoch 352 Batch 0/5 train_loss = 1.806\n", + "Epoch 354 Batch 0/5 train_loss = 1.790\n", + "Epoch 356 Batch 0/5 train_loss = 1.761\n", + "Epoch 358 Batch 0/5 train_loss = 1.745\n", + "Epoch 360 Batch 0/5 train_loss = 1.735\n", + "Epoch 362 Batch 0/5 train_loss = 1.718\n", + "Epoch 364 Batch 0/5 train_loss = 1.747\n", + "Epoch 366 Batch 0/5 train_loss = 1.726\n", + "Epoch 368 Batch 0/5 train_loss = 1.753\n", + "Epoch 370 Batch 0/5 train_loss = 1.703\n", + "Epoch 372 Batch 0/5 train_loss = 1.662\n", + "Epoch 374 Batch 0/5 train_loss = 1.643\n", + "Epoch 376 Batch 0/5 train_loss = 1.624\n", + "Epoch 378 Batch 0/5 train_loss = 1.617\n", + "Epoch 380 Batch 0/5 train_loss = 1.598\n", + "Epoch 382 Batch 0/5 train_loss = 1.613\n", + "Epoch 384 Batch 0/5 train_loss = 1.601\n", + "Epoch 386 Batch 0/5 train_loss = 1.584\n", + "Epoch 388 Batch 0/5 train_loss = 1.569\n", + "Epoch 390 Batch 0/5 train_loss = 1.557\n", + "Epoch 392 Batch 0/5 train_loss = 1.534\n", + "Epoch 394 Batch 0/5 train_loss = 1.534\n", + "Epoch 396 Batch 0/5 train_loss = 1.520\n", + "Epoch 398 Batch 0/5 train_loss = 1.547\n", + "Epoch 400 Batch 0/5 train_loss = 1.545\n", + "Epoch 402 Batch 0/5 train_loss = 1.521\n", + "Epoch 404 Batch 0/5 train_loss = 1.486\n", + "Epoch 406 Batch 0/5 train_loss = 1.469\n", + "Epoch 408 Batch 0/5 train_loss = 1.458\n", + "Epoch 410 Batch 0/5 train_loss = 1.442\n", + "Epoch 412 Batch 0/5 train_loss = 1.431\n", + "Epoch 414 Batch 0/5 train_loss = 1.410\n", + "Epoch 416 Batch 0/5 train_loss = 1.411\n", + "Epoch 418 Batch 0/5 train_loss = 1.412\n", + "Epoch 420 Batch 0/5 train_loss = 1.398\n", + "Epoch 422 Batch 0/5 train_loss = 1.417\n", + "Epoch 424 Batch 0/5 train_loss = 1.381\n", + "Epoch 426 Batch 0/5 train_loss = 1.355\n", + "Epoch 428 Batch 0/5 train_loss = 1.354\n", + "Epoch 430 Batch 0/5 train_loss = 1.338\n", + "Epoch 432 Batch 0/5 train_loss = 1.321\n", + "Epoch 434 Batch 0/5 train_loss = 1.326\n", + "Epoch 436 Batch 0/5 train_loss = 1.324\n", + "Epoch 438 Batch 0/5 train_loss = 1.314\n", + "Epoch 440 Batch 0/5 train_loss = 1.292\n", + "Epoch 442 Batch 0/5 train_loss = 1.279\n", + "Epoch 444 Batch 0/5 train_loss = 1.259\n", + "Epoch 446 Batch 0/5 train_loss = 1.283\n", + "Epoch 448 Batch 0/5 train_loss = 1.274\n", + "Epoch 450 Batch 0/5 train_loss = 1.251\n", + "Epoch 452 Batch 0/5 train_loss = 1.279\n", + "Epoch 454 Batch 0/5 train_loss = 1.249\n", + "Epoch 456 Batch 0/5 train_loss = 1.214\n", + "Epoch 458 Batch 0/5 train_loss = 1.196\n", + "Epoch 460 Batch 0/5 train_loss = 1.185\n", + "Epoch 462 Batch 0/5 train_loss = 1.174\n", + "Epoch 464 Batch 0/5 train_loss = 1.158\n", + "Epoch 466 Batch 0/5 train_loss = 1.195\n", + "Epoch 468 Batch 0/5 train_loss = 1.158\n", + "Epoch 470 Batch 0/5 train_loss = 1.145\n", + "Epoch 472 Batch 0/5 train_loss = 1.160\n", + "Epoch 474 Batch 0/5 train_loss = 1.123\n", + "Epoch 476 Batch 0/5 train_loss = 1.118\n", + "Epoch 478 Batch 0/5 train_loss = 1.103\n", + "Epoch 480 Batch 0/5 train_loss = 1.088\n", + "Epoch 482 Batch 0/5 train_loss = 1.089\n", + "Epoch 484 Batch 0/5 train_loss = 1.094\n", + "Epoch 486 Batch 0/5 train_loss = 1.092\n", + "Epoch 488 Batch 0/5 train_loss = 1.106\n", + "Epoch 490 Batch 0/5 train_loss = 1.053\n", + "Epoch 492 Batch 0/5 train_loss = 1.052\n", + "Epoch 494 Batch 0/5 train_loss = 1.046\n", + "Epoch 496 Batch 0/5 train_loss = 1.030\n", + "Epoch 498 Batch 0/5 train_loss = 1.021\n", + "Epoch 500 Batch 0/5 train_loss = 1.020\n", + "Epoch 502 Batch 0/5 train_loss = 1.046\n", + "Epoch 504 Batch 0/5 train_loss = 1.040\n", + "Epoch 506 Batch 0/5 train_loss = 1.026\n", + "Epoch 508 Batch 0/5 train_loss = 0.982\n", + "Epoch 510 Batch 0/5 train_loss = 0.969\n", + "Epoch 512 Batch 0/5 train_loss = 0.962\n", + "Epoch 514 Batch 0/5 train_loss = 0.946\n", + "Epoch 516 Batch 0/5 train_loss = 0.941\n", + "Epoch 518 Batch 0/5 train_loss = 0.951\n", + "Epoch 520 Batch 0/5 train_loss = 0.945\n", + "Epoch 522 Batch 0/5 train_loss = 0.952\n", + "Epoch 524 Batch 0/5 train_loss = 0.931\n", + "Epoch 526 Batch 0/5 train_loss = 0.905\n", + "Epoch 528 Batch 0/5 train_loss = 0.893\n", + "Epoch 530 Batch 0/5 train_loss = 0.881\n", + "Epoch 532 Batch 0/5 train_loss = 0.882\n", + "Epoch 534 Batch 0/5 train_loss = 0.871\n", + "Epoch 536 Batch 0/5 train_loss = 0.904\n", + "Epoch 538 Batch 0/5 train_loss = 0.893\n", + "Epoch 540 Batch 0/5 train_loss = 0.884\n", + "Epoch 542 Batch 0/5 train_loss = 0.864\n", + "Epoch 544 Batch 0/5 train_loss = 0.854\n", + "Epoch 546 Batch 0/5 train_loss = 0.854\n", + "Epoch 548 Batch 0/5 train_loss = 0.836\n", + "Epoch 550 Batch 0/5 train_loss = 0.816\n", + "Epoch 552 Batch 0/5 train_loss = 0.829\n", + "Epoch 554 Batch 0/5 train_loss = 0.813\n", + "Epoch 556 Batch 0/5 train_loss = 0.798\n", + "Epoch 558 Batch 0/5 train_loss = 0.808\n", + "Epoch 560 Batch 0/5 train_loss = 0.789\n", + "Epoch 562 Batch 0/5 train_loss = 0.791\n", + "Epoch 564 Batch 0/5 train_loss = 0.779\n", + "Epoch 566 Batch 0/5 train_loss = 0.765\n", + "Epoch 568 Batch 0/5 train_loss = 0.746\n", + "Epoch 570 Batch 0/5 train_loss = 0.746\n", + "Epoch 572 Batch 0/5 train_loss = 0.733\n", + "Epoch 574 Batch 0/5 train_loss = 0.733\n", + "Epoch 576 Batch 0/5 train_loss = 0.752\n", + "Epoch 578 Batch 0/5 train_loss = 0.727\n", + "Epoch 580 Batch 0/5 train_loss = 0.712\n", + "Epoch 582 Batch 0/5 train_loss = 0.711\n", + "Epoch 584 Batch 0/5 train_loss = 0.708\n", + "Epoch 586 Batch 0/5 train_loss = 0.695\n", + "Epoch 588 Batch 0/5 train_loss = 0.699\n", + "Epoch 590 Batch 0/5 train_loss = 0.688\n", + "Epoch 592 Batch 0/5 train_loss = 0.682\n", + "Epoch 594 Batch 0/5 train_loss = 0.703\n", + "Epoch 596 Batch 0/5 train_loss = 0.681\n", + "Epoch 598 Batch 0/5 train_loss = 0.672\n", + "Epoch 600 Batch 0/5 train_loss = 0.678\n", + "Epoch 602 Batch 0/5 train_loss = 0.657\n", + "Epoch 604 Batch 0/5 train_loss = 0.652\n", + "Epoch 606 Batch 0/5 train_loss = 0.627\n", + "Epoch 608 Batch 0/5 train_loss = 0.623\n", + "Epoch 610 Batch 0/5 train_loss = 0.633\n", + "Epoch 612 Batch 0/5 train_loss = 0.608\n", + "Epoch 614 Batch 0/5 train_loss = 0.614\n", + "Epoch 616 Batch 0/5 train_loss = 0.620\n", + "Epoch 618 Batch 0/5 train_loss = 0.610\n", + "Epoch 620 Batch 0/5 train_loss = 0.596\n", + "Epoch 622 Batch 0/5 train_loss = 0.596\n", + "Epoch 624 Batch 0/5 train_loss = 0.605\n", + "Epoch 626 Batch 0/5 train_loss = 0.574\n", + "Epoch 628 Batch 0/5 train_loss = 0.581\n", + "Epoch 630 Batch 0/5 train_loss = 0.571\n", + "Epoch 632 Batch 0/5 train_loss = 0.563\n", + "Epoch 634 Batch 0/5 train_loss = 0.582\n", + "Epoch 636 Batch 0/5 train_loss = 0.579\n", + "Epoch 638 Batch 0/5 train_loss = 0.562\n", + "Epoch 640 Batch 0/5 train_loss = 0.549\n", + "Epoch 642 Batch 0/5 train_loss = 0.540\n", + "Epoch 644 Batch 0/5 train_loss = 0.520\n", + "Epoch 646 Batch 0/5 train_loss = 0.515\n", + "Epoch 648 Batch 0/5 train_loss = 0.509\n", + "Epoch 650 Batch 0/5 train_loss = 0.509\n", + "Epoch 652 Batch 0/5 train_loss = 0.527\n", + "Epoch 654 Batch 0/5 train_loss = 0.524\n", + "Epoch 656 Batch 0/5 train_loss = 0.509\n", + "Epoch 658 Batch 0/5 train_loss = 0.523\n", + "Epoch 660 Batch 0/5 train_loss = 0.502\n", + "Epoch 662 Batch 0/5 train_loss = 0.477\n", + "Epoch 664 Batch 0/5 train_loss = 0.473\n", + "Epoch 666 Batch 0/5 train_loss = 0.463\n", + "Epoch 668 Batch 0/5 train_loss = 0.457\n", + "Epoch 670 Batch 0/5 train_loss = 0.455\n", + "Epoch 672 Batch 0/5 train_loss = 0.459\n", + "Epoch 674 Batch 0/5 train_loss = 0.475\n", + "Epoch 676 Batch 0/5 train_loss = 0.471\n", + "Epoch 678 Batch 0/5 train_loss = 0.455\n", + "Epoch 680 Batch 0/5 train_loss = 0.443\n", + "Epoch 682 Batch 0/5 train_loss = 0.456\n", + "Epoch 684 Batch 0/5 train_loss = 0.440\n", + "Epoch 686 Batch 0/5 train_loss = 0.421\n", + "Epoch 688 Batch 0/5 train_loss = 0.413\n", + "Epoch 690 Batch 0/5 train_loss = 0.405\n", + "Epoch 692 Batch 0/5 train_loss = 0.401\n", + "Epoch 694 Batch 0/5 train_loss = 0.404\n", + "Epoch 696 Batch 0/5 train_loss = 0.400\n", + "Epoch 698 Batch 0/5 train_loss = 0.428\n", + "Epoch 700 Batch 0/5 train_loss = 0.451\n", + "Epoch 702 Batch 0/5 train_loss = 0.426\n", + "Epoch 704 Batch 0/5 train_loss = 0.410\n", + "Epoch 706 Batch 0/5 train_loss = 0.422\n", + "Epoch 708 Batch 0/5 train_loss = 0.398\n", + "Epoch 710 Batch 0/5 train_loss = 0.377\n", + "Epoch 712 Batch 0/5 train_loss = 0.368\n", + "Epoch 714 Batch 0/5 train_loss = 0.358\n", + "Epoch 716 Batch 0/5 train_loss = 0.352\n", + "Epoch 718 Batch 0/5 train_loss = 0.349\n", + "Epoch 720 Batch 0/5 train_loss = 0.344\n", + "Epoch 722 Batch 0/5 train_loss = 0.346\n", + "Epoch 724 Batch 0/5 train_loss = 0.345\n", + "Epoch 726 Batch 0/5 train_loss = 0.337\n", + "Epoch 728 Batch 0/5 train_loss = 0.345\n", + "Epoch 730 Batch 0/5 train_loss = 0.348\n", + "Epoch 732 Batch 0/5 train_loss = 0.358\n", + "Epoch 734 Batch 0/5 train_loss = 0.346\n", + "Epoch 736 Batch 0/5 train_loss = 0.337\n", + "Epoch 738 Batch 0/5 train_loss = 0.329\n", + "Epoch 740 Batch 0/5 train_loss = 0.320\n", + "Epoch 742 Batch 0/5 train_loss = 0.323\n", + "Epoch 744 Batch 0/5 train_loss = 0.316\n", + "Epoch 746 Batch 0/5 train_loss = 0.304\n", + "Epoch 748 Batch 0/5 train_loss = 0.299\n", + "Epoch 750 Batch 0/5 train_loss = 0.292\n", + "Epoch 752 Batch 0/5 train_loss = 0.288\n", + "Epoch 754 Batch 0/5 train_loss = 0.289\n", + "Epoch 756 Batch 0/5 train_loss = 0.284\n", + "Epoch 758 Batch 0/5 train_loss = 0.290\n", + "Epoch 760 Batch 0/5 train_loss = 0.304\n", + "Epoch 762 Batch 0/5 train_loss = 0.311\n", + "Epoch 764 Batch 0/5 train_loss = 0.405\n", + "Epoch 766 Batch 0/5 train_loss = 0.390\n", + "Epoch 768 Batch 0/5 train_loss = 0.344\n", + "Epoch 770 Batch 0/5 train_loss = 0.320\n", + "Epoch 772 Batch 0/5 train_loss = 0.280\n", + "Epoch 774 Batch 0/5 train_loss = 0.265\n", + "Epoch 776 Batch 0/5 train_loss = 0.258\n", + "Epoch 778 Batch 0/5 train_loss = 0.252\n", + "Epoch 780 Batch 0/5 train_loss = 0.247\n", + "Epoch 782 Batch 0/5 train_loss = 0.243\n", + "Epoch 784 Batch 0/5 train_loss = 0.240\n", + "Epoch 786 Batch 0/5 train_loss = 0.237\n", + "Epoch 788 Batch 0/5 train_loss = 0.233\n", + "Epoch 790 Batch 0/5 train_loss = 0.231\n", + "Epoch 792 Batch 0/5 train_loss = 0.229\n", + "Epoch 794 Batch 0/5 train_loss = 0.225\n", + "Epoch 796 Batch 0/5 train_loss = 0.230\n", + "Epoch 798 Batch 0/5 train_loss = 0.226\n", + "Epoch 800 Batch 0/5 train_loss = 0.222\n", + "Epoch 802 Batch 0/5 train_loss = 0.237\n", + "Epoch 804 Batch 0/5 train_loss = 0.225\n", + "Epoch 806 Batch 0/5 train_loss = 0.225\n", + "Epoch 808 Batch 0/5 train_loss = 0.245\n", + "Epoch 810 Batch 0/5 train_loss = 0.227\n", + "Epoch 812 Batch 0/5 train_loss = 0.210\n", + "Epoch 814 Batch 0/5 train_loss = 0.206\n", + "Epoch 816 Batch 0/5 train_loss = 0.202\n", + "Epoch 818 Batch 0/5 train_loss = 0.198\n", + "Epoch 820 Batch 0/5 train_loss = 0.195\n", + "Epoch 822 Batch 0/5 train_loss = 0.192\n", + "Epoch 824 Batch 0/5 train_loss = 0.189\n", + "Epoch 826 Batch 0/5 train_loss = 0.189\n", + "Epoch 828 Batch 0/5 train_loss = 0.187\n", + "Epoch 830 Batch 0/5 train_loss = 0.186\n", + "Epoch 832 Batch 0/5 train_loss = 0.187\n", + "Epoch 834 Batch 0/5 train_loss = 0.189\n", + "Epoch 836 Batch 0/5 train_loss = 0.189\n", + "Epoch 838 Batch 0/5 train_loss = 0.197\n", + "Epoch 840 Batch 0/5 train_loss = 0.207\n", + "Epoch 842 Batch 0/5 train_loss = 0.196\n", + "Epoch 844 Batch 0/5 train_loss = 0.187\n", + "Epoch 846 Batch 0/5 train_loss = 0.197\n", + "Epoch 848 Batch 0/5 train_loss = 0.189\n", + "Epoch 850 Batch 0/5 train_loss = 0.176\n", + "Epoch 852 Batch 0/5 train_loss = 0.171\n", + "Epoch 854 Batch 0/5 train_loss = 0.164\n", + "Epoch 856 Batch 0/5 train_loss = 0.161\n", + "Epoch 858 Batch 0/5 train_loss = 0.157\n", + "Epoch 860 Batch 0/5 train_loss = 0.154\n", + "Epoch 862 Batch 0/5 train_loss = 0.152\n", + "Epoch 864 Batch 0/5 train_loss = 0.150\n", + "Epoch 866 Batch 0/5 train_loss = 0.148\n", + "Epoch 868 Batch 0/5 train_loss = 0.146\n", + "Epoch 870 Batch 0/5 train_loss = 0.145\n", + "Epoch 872 Batch 0/5 train_loss = 0.145\n", + "Epoch 874 Batch 0/5 train_loss = 0.142\n", + "Epoch 876 Batch 0/5 train_loss = 0.143\n", + "Epoch 878 Batch 0/5 train_loss = 0.159\n", + "Epoch 880 Batch 0/5 train_loss = 0.145\n", + "Epoch 882 Batch 0/5 train_loss = 0.161\n", + "Epoch 884 Batch 0/5 train_loss = 0.211\n", + "Epoch 886 Batch 0/5 train_loss = 0.196\n", + "Epoch 888 Batch 0/5 train_loss = 0.335\n", + "Epoch 890 Batch 0/5 train_loss = 0.325\n", + "Epoch 892 Batch 0/5 train_loss = 0.279\n", + "Epoch 894 Batch 0/5 train_loss = 0.244\n", + "Epoch 896 Batch 0/5 train_loss = 0.214\n", + "Epoch 898 Batch 0/5 train_loss = 0.174\n", + "Epoch 900 Batch 0/5 train_loss = 0.147\n", + "Epoch 902 Batch 0/5 train_loss = 0.138\n", + "Epoch 904 Batch 0/5 train_loss = 0.131\n", + "Epoch 906 Batch 0/5 train_loss = 0.128\n", + "Epoch 908 Batch 0/5 train_loss = 0.125\n", + "Epoch 910 Batch 0/5 train_loss = 0.123\n", + "Epoch 912 Batch 0/5 train_loss = 0.121\n", + "Epoch 914 Batch 0/5 train_loss = 0.119\n", + "Epoch 916 Batch 0/5 train_loss = 0.117\n", + "Epoch 918 Batch 0/5 train_loss = 0.116\n", + "Epoch 920 Batch 0/5 train_loss = 0.114\n", + "Epoch 922 Batch 0/5 train_loss = 0.113\n", + "Epoch 924 Batch 0/5 train_loss = 0.112\n", + "Epoch 926 Batch 0/5 train_loss = 0.111\n", + "Epoch 928 Batch 0/5 train_loss = 0.109\n", + "Epoch 930 Batch 0/5 train_loss = 0.108\n", + "Epoch 932 Batch 0/5 train_loss = 0.107\n", + "Epoch 934 Batch 0/5 train_loss = 0.106\n", + "Epoch 936 Batch 0/5 train_loss = 0.105\n", + "Epoch 938 Batch 0/5 train_loss = 0.103\n", + "Epoch 940 Batch 0/5 train_loss = 0.102\n", + "Epoch 942 Batch 0/5 train_loss = 0.101\n", + "Epoch 944 Batch 0/5 train_loss = 0.100\n", + "Epoch 946 Batch 0/5 train_loss = 0.099\n", + "Epoch 948 Batch 0/5 train_loss = 0.098\n", + "Epoch 950 Batch 0/5 train_loss = 0.097\n", + "Epoch 952 Batch 0/5 train_loss = 0.096\n", + "Epoch 954 Batch 0/5 train_loss = 0.095\n", + "Epoch 956 Batch 0/5 train_loss = 0.094\n", + "Epoch 958 Batch 0/5 train_loss = 0.094\n", + "Epoch 960 Batch 0/5 train_loss = 0.093\n", + "Epoch 962 Batch 0/5 train_loss = 0.092\n", + "Epoch 964 Batch 0/5 train_loss = 0.091\n", + "Epoch 966 Batch 0/5 train_loss = 0.090\n", + "Epoch 968 Batch 0/5 train_loss = 0.089\n", + "Epoch 970 Batch 0/5 train_loss = 0.088\n", + "Epoch 972 Batch 0/5 train_loss = 0.088\n", + "Epoch 974 Batch 0/5 train_loss = 0.087\n", + "Epoch 976 Batch 0/5 train_loss = 0.086\n", + "Epoch 978 Batch 0/5 train_loss = 0.085\n", + "Epoch 980 Batch 0/5 train_loss = 0.084\n", + "Epoch 982 Batch 0/5 train_loss = 0.083\n", + "Epoch 984 Batch 0/5 train_loss = 0.083\n", + "Epoch 986 Batch 0/5 train_loss = 0.082\n", + "Epoch 988 Batch 0/5 train_loss = 0.081\n", + "Epoch 990 Batch 0/5 train_loss = 0.080\n", + "Epoch 992 Batch 0/5 train_loss = 0.080\n", + "Epoch 994 Batch 0/5 train_loss = 0.079\n", + "Epoch 996 Batch 0/5 train_loss = 0.078\n", + "Epoch 998 Batch 0/5 train_loss = 0.078\n", + "Model Trained and Saved\n" + ] + } + ], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "batches = get_batches(int_text, batch_size, seq_length)\n", + "\n", + "with tf.Session(graph=train_graph) as sess:\n", + " sess.run(tf.global_variables_initializer())\n", + "\n", + " for epoch_i in range(num_epochs):\n", + " state = sess.run(initial_state, {input_text: batches[0][0]})\n", + "\n", + " for batch_i, (x, y) in enumerate(batches):\n", + " feed = {\n", + " input_text: x,\n", + " targets: y,\n", + " initial_state: state,\n", + " lr: learning_rate}\n", + " train_loss, state, _ = sess.run([cost, final_state, train_op], feed)\n", + "\n", + " # Show every batches\n", + " if (epoch_i * len(batches) + batch_i) % show_every_n_batches == 0:\n", + " print('Epoch {:>3} Batch {:>4}/{} train_loss = {:.3f}'.format(\n", + " epoch_i,\n", + " batch_i,\n", + " len(batches),\n", + " train_loss))\n", + "\n", + " # Save Model\n", + " saver = tf.train.Saver()\n", + " saver.save(sess, save_dir)\n", + " print('Model Trained and Saved')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Save Parameters\n", + "Save `seq_length` and `save_dir` for generating a new TV script." + ] + }, + { + "cell_type": "code", + "execution_count": 198, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "# Save parameters for checkpoint\n", + "helper.save_params((seq_length, save_dir))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# Checkpoint" + ] + }, + { + "cell_type": "code", + "execution_count": 272, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL\n", + "\"\"\"\n", + "import tensorflow as tf\n", + "import numpy as np\n", + "import helper\n", + "import problem_unittests as tests\n", + "\n", + "_, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess()\n", + "seq_length, load_dir = helper.load_params()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Implement Generate Functions\n", + "### Get Tensors\n", + "Get tensors from `loaded_graph` using the function [`get_tensor_by_name()`](https://www.tensorflow.org/api_docs/python/tf/Graph#get_tensor_by_name). Get the tensors using the following names:\n", + "- \"input:0\"\n", + "- \"initial_state:0\"\n", + "- \"final_state:0\"\n", + "- \"probs:0\"\n", + "\n", + "Return the tensors in the following tuple `(InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor)` " + ] + }, + { + "cell_type": "code", + "execution_count": 273, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def get_tensors(loaded_graph):\n", + " \"\"\"\n", + " Get input, initial state, final state, and probabilities tensor from \n", + " :param loaded_graph: TensorFlow graph loaded from file\n", + " :return: Tuple (InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor)\n", + " \"\"\"\n", + " \n", + " t_input = loaded_graph.get_tensor_by_name('input:0')\n", + " t_initial_state = loaded_graph.get_tensor_by_name('initial_state:0')\n", + " t_final_state = loaded_graph.get_tensor_by_name('final_state:0')\n", + " t_probs = loaded_graph.get_tensor_by_name('probs:0')\n", + " return t_input, t_initial_state, t_final_state, t_probs\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_get_tensors(get_tensors)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "### Choose Word\n", + "Implement the `pick_word()` function to select the next word using `probabilities`." + ] + }, + { + "cell_type": "code", + "execution_count": 274, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tests Passed\n" + ] + } + ], + "source": [ + "def pick_word(probabilities, int_to_vocab):\n", + " \"\"\"\n", + " Pick the next word in the generated text\n", + " :param probabilities: Probabilites of the next word\n", + " :param int_to_vocab: Dictionary of word ids as the keys and words as the values\n", + " :return: String of the predicted word\n", + " \"\"\"\n", + " \n", + " word = int_to_vocab[np.argmax(probabilities)]\n", + " \n", + " return word\n", + "\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "tests.test_pick_word(pick_word)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "## Generate TV Script\n", + "This will generate the TV script for you. Set `gen_length` to the length of TV script you want to generate." + ] + }, + { + "cell_type": "code", + "execution_count": 275, + "metadata": { + "collapsed": false, + "deletable": true, + "editable": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "moe_szyslak: sizes good-looking slap detective_homer_simpson: takin' cesss planning parrot smoke parrot sizes frustrated choked slap gesture elmo's jerry duff's butterball officials sizes themselves gesture whiny irrelevant paintings continuing huddle tony butterball worst jerry neighborhood slap slap slap detective_homer_simpson: meatpies crooks sail slap slap slap sizes worst mr slap worst gesture parrot calendars bathed schnapps butterball stuck jerry dash my-y-y-y-y-y slap slap slap detective_homer_simpson: rain gesture bashir's jerry longest slap slap slap detective_homer_simpson: realize gesture parrot neighborhood jerry dad's poet presided scrutinizes presided rope neighborhood booth detective_homer_simpson: enjoyed gesture electronic sam: jerry dash my-y-y-y-y-y butterball protestantism dash my-y-y-y-y-y friendly dash happiness agreement slap protestantism muttering muttering sugar-free parrot is: abandon fudd scrutinizes detective_homer_simpson: itself duff's butterball drinker slap muttering shaky slap cuff giant face knockin' tv-station_announcer: that's slap detective_homer_simpson: celebrate rubbed 2nd_voice_on_transmitter: further rubbed usual laramie bunch slap detective_homer_simpson: itself gesture child jerry premise poet sarcastic slap detective_homer_simpson: meatpies skydiving scrutinizes scream renee: scrutinizes detective_homer_simpson: itself lenses butterball tapered smokin' 2nd_voice_on_transmitter: slap detective_homer_simpson: detective_homer_simpson: detective_homer_simpson: aims always butterball oh-so-sophisticated wine dislike sizes bury gang butterball renee: rope laramie themselves beings slap detective_homer_simpson: rain indicates butterball stunned slap detective_homer_simpson: rain arts butterball ratted 2nd_voice_on_transmitter: pepsi oh-so-sophisticated planning booth rope presided rope abandon worst\n" + ] + } + ], + "source": [ + "gen_length = 200\n", + "# homer_simpson, moe_szyslak, or Barney_Gumble\n", + "prime_word = 'moe_szyslak'\n", + "\n", + "\"\"\"\n", + "DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE\n", + "\"\"\"\n", + "loaded_graph = tf.Graph()\n", + "with tf.Session(graph=loaded_graph) as sess:\n", + " # Load saved model\n", + " loader = tf.train.import_meta_graph(load_dir + '.meta')\n", + " loader.restore(sess, load_dir)\n", + "\n", + " # Get Tensors from loaded model\n", + " input_text, initial_state, final_state, probs = get_tensors(loaded_graph)\n", + "\n", + " # Sentences generation setup\n", + " gen_sentences = [prime_word + ':']\n", + " prev_state = sess.run(initial_state, {input_text: np.array([[1]])})\n", + "\n", + " # Generate sentences\n", + " for n in range(gen_length):\n", + " # Dynamic Input\n", + " dyn_input = [[vocab_to_int[word] for word in gen_sentences[-seq_length:]]]\n", + " dyn_seq_length = len(dyn_input[0])\n", + "\n", + " # Get Prediction\n", + " probabilities, prev_state = sess.run(\n", + " [probs, final_state],\n", + " {input_text: dyn_input, initial_state: prev_state})\n", + " \n", + " pred_word = pick_word(probabilities[dyn_seq_length-1], int_to_vocab)\n", + "\n", + " gen_sentences.append(pred_word)\n", + " \n", + " # Remove tokens\n", + " tv_script = ' '.join(gen_sentences)\n", + " for key, token in token_dict.items():\n", + " ending = ' ' if key in ['\\n', '(', '\"'] else ''\n", + " tv_script = tv_script.replace(' ' + token.lower(), key)\n", + " tv_script = tv_script.replace('\\n ', '\\n')\n", + " tv_script = tv_script.replace('( ', '(')\n", + " \n", + " print(tv_script)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": true, + "editable": true + }, + "source": [ + "# The TV Script is Nonsensical\n", + "It's ok if the TV script doesn't make any sense. We trained on less than a megabyte of text. In order to get good results, you'll have to use a smaller vocabulary or get more data. Luckly there's more data! As we mentioned in the begging of this project, this is a subset of [another dataset](https://www.kaggle.com/wcukierski/the-simpsons-by-the-data). We didn't have you train on all the data, because that would take too long. However, you are free to train your neural network on all the data. After you complete the project, of course.\n", + "# Submitting This Project\n", + "When submitting this project, make sure to run all the cells before saving the notebook. Save the notebook file as \"dlnd_tv_script_generation.ipynb\" and save it as a HTML file under \"File\" -> \"Download as\". Include the \"helper.py\" and \"problem_unittests.py\" files in your submission." + ] + } + ], + "metadata": { + "hide_input": false, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + }, + "toc": { + "colors": { + "hover_highlight": "#DAA520", + "running_highlight": "#FF0000", + "selected_highlight": "#FFD700" + }, + "moveMenuLeft": true, + "nav_menu": { + "height": "511px", + "width": "251px" + }, + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 4, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false + }, + "widgets": { + "state": {}, + "version": "1.1.2" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/helper.py b/helper.py new file mode 100644 index 0000000..eec8857 --- /dev/null +++ b/helper.py @@ -0,0 +1,55 @@ +import os +import pickle + + +def load_data(path): + """ + Load Dataset from File + """ + input_file = os.path.join(path) + with open(input_file, "r") as f: + data = f.read() + + return data + + +def preprocess_and_save_data(dataset_path, token_lookup, create_lookup_tables): + """ + Preprocess Text Data + """ + text = load_data(dataset_path) + + # Ignore notice, since we don't use it for analysing the data + text = text[81:] + + token_dict = token_lookup() + for key, token in token_dict.items(): + text = text.replace(key, ' {} '.format(token)) + + text = text.lower() + text = text.split() + + vocab_to_int, int_to_vocab = create_lookup_tables(text) + int_text = [vocab_to_int[word] for word in text] + pickle.dump((int_text, vocab_to_int, int_to_vocab, token_dict), open('preprocess.p', 'wb')) + + +def load_preprocess(): + """ + Load the Preprocessed Training data and return them in batches of or less + """ + return pickle.load(open('preprocess.p', mode='rb')) + + +def save_params(params): + """ + Save parameters to file + """ + pickle.dump(params, open('params.p', 'wb')) + + +def load_params(): + """ + Load parameters from file + """ + return pickle.load(open('params.p', mode='rb')) diff --git a/logs/1/events.out.tfevents.1490895533.ip-172-31-18-64 b/logs/1/events.out.tfevents.1490895533.ip-172-31-18-64 new file mode 100644 index 0000000000000000000000000000000000000000..e4b0945f1fb4cb75c9085de65b3ae3c06d12d3da GIT binary patch literal 582955 zcmeFa3z!_&Ss*&qJ(|Za$+AkaEtg|EmL21C9wW_2wlgGgBtK*&mf~27oS4Mzp6Qm< zQBU`{yT_7j_F`aRcY`;Cn?MrY+$@3n5nfq#VJ{)!0?Az<;jut?U!FH%fqa*bKp=!m z?tiN4oI0n@qfS+wDw$pUeb)5!bl3Uc=Rg1J?3;=Hd;eFxZvW!U%xiw&;k!O{B-pt& z?(~N*zyGP;aJstuu0MO<+~;na4L|rb-}Ux?zxemJY=ocN|LLE<8lHe{{o&@;)ZhN_ zptIgPKN@s<rYi{zKd`40JIb*6Oyu4<3ArudC^_~yjF30|-o_}dr0 zME~=IUtkU9r=9V+-gF|ZkvvvdF#j44!_GmkGamMb=i1}Wv}d=H*V-pxGY7M)jeQ=x z7Pg%Vk&b=@5tUl=;GGVM5=h@%fVtIKzp>D0T?GqBqIOKcCcG8)Os3<0x7Te?2L1Kk z(q!6Mzu<4XcQl+#Tlyz?JRh2^Yhl|{oxxVm-+6TVLenQ}d8^(GwBZ`XQCAmoyi3sg`l+ucJ>B?{^lfkbEb7Qz*_HIe@H(zLq0a_T@=1MLxi9=>?R+& zRuY(5K!t+iFdvG;662Ro`Mh{^9{D`8>OGJagtQJuk7}c$hLSb=Fv?dTk~E;pBxnQ7 z5_}QW8S>^ye9K>N5z9vuq}L| zKl?y`FbE>}1Adj(mr2jL2KKD(7K{w4!UGg94_*h0AsyNW6QWu`(`+dy59=|1P@j}! zO_oY%7+Z;^iI^>p`tS(s49S2H3#8@DqNgROn*Q)+n2(=YpL7mjz)eeF*#2P%*gT_D@POJNq&zo@ZlI%r?hnh_-6!ptDcv`vmnS=eI?qw*XG)HAds!3~TaF=W zs-u7>b245aOIRAbS7vfs3`;O!(Ew+^ge)w}mckOFo0bVi1>GN()joVK?%I0%>H~x% zS}=l}?%!DJb-Q?|T^e_W-O)ySYuGQK7dG}UJQK{!Jd1~Z zgXW7|N8x#acEOGQ(B$qv`%$g7aWYMW$FM}_5?zA-j`qPSuk6`-;QKweU5myeAsOEo zQPJNi{2FACyY)h8PrC|stb*TM_~Ie_=h@J3PkO!Xa(JQ=kidqHNSakYb`KR%=Fj(P)oZQ91q@Y0%2oo;5Scf4FXGo8h}A;Ea=C;`g#xc>6qv@ zk950+r4VYwRU38117+?4y!@u+3 zI{EFTaj(;zKu-lq9MZ8}!|uxBiyjPMKSvO< zlHd!APN1F@;>iH{uXX~5a9j3j*&d-S94KEl>t(u!*6aOT172W4Fy$-qd}K@xEEp2l zpmosdQ+Wf=mZuNp$t8Wf(S*3N*hUXt)Iw;a&7c6HxWXpfZsHnUFA+>zfzT*I!+J#~ znw-mKJOpW0^xzW>xXFy9m2esbj}j`dB&X%)bRw#hF^x=-EvZZ*B_+D>Cb(9jCT2Fw zF;Rhu8bIMWagT~jGD*rsF@B?c2{b@awOmJ~E5HVoAQMpzofcw?`@WEL8HD3ly^Q|LO+@Zc#a zEsA2sC;iu$;DHe`hpf#=581FAw5+A+qBJssYZa(jOOi=^8biO;G!m_*vY=&cDz~Hx zP+&YpW0H6`+XF3YX|74l+GP!Ei)W&ATAPuWD#u!ys4Bu*lEz}n%JZ1A&)~5%w5)ZQ z)zY$-X1Ua?UD2?X=D=c{J!|!aw@KC`JpT{}EPN60y8=5l8pzj_PDd1|OlLEW&{Ahp z5FfpAk~6UC3^bGJ`mu;GT~Ym@97Qr&?k!!BIh!W51pGCi!7pVwo9U=|TCievn@=z_ zN0H%1QuGd5ey&n?)AIO~b~iH)vh5Y|pswKyZ^%vVf|n_z@p{RLNDvE?{HQ_8J`vFs zYZTIz)QPB=kj8*#2p>f+OGxSGmZhAIrDW;OLAt$PdB{R;%e88t+mH_>ry@a2bW1Ag zb>M%M9m>dMJQTap=ELoZ#@j;B47*0?VMR`MYP$|ICfY6ekXUF^iMNG#hlM9;p^T}b z*^*<(taoDSuXylA*poUmO#2x8j}v_|%dEV~Cj1>nw^PqO-5X9u<0Ip7=d!<(`{m3^ zU}kb?xRW#mO8Pn}y=`~SPJ82a7`!JzXdiZTFJtDSzuunopT_*f9W$+)r0Dn#fT(7c z)ZDgKVfQoytKA`RMTw6W`M0J(r_Uva_P7-J0H##1RI|5%B_napA}Ztkd(3a8zRn?w zmNh5tF40wk%x@oF>lG@J+~%jFmpnqkNF$1@|Z+YEF#|2hcl?K*k<|?-xom%fqaT8sV-#WA6?^V4kIUwAJu+ZB=bwi4)Px_K0AUu4P zg<15K=}W8-;6-q&e@j>hpr=UtApETCs9ZeXAM}>)A0lD=ZQ+kV0c;p09kfbv^7M`S!t}H)!_}fX=j!Ejw7d^FmTLflVP;)`9!za2mQd)D*z4 z$W8}`1=(OCJPe)*QdhBpwq2){7+<;5xWQ(TY|CP&72z zfUZ~px{?#nsE-4g9`QPZqG&a;0X<{|=%GphMH7$>=wT~B59b6lC8NLQ!Ocn!VUF=WS&--)HPa}?I_*ySx{xOz``Zufmv(rt@cfjqc9YF3+hAiY(tdLu&3b~a^kvn9C+#ze^N-!cX*AHcK z?ywbdhqI8Y5vRk6&h<4GgZ5u?wfJhQ5Sf-F9zfV#diIbYSuqwyZ)hS)<0~!{^3IFzpDW+Rqo+j zDC-gIH#xH+dSU6!k^4AqSB4X-K1AOr6{E4(s6-fJ5!@@Ep*{FW174noDoMo3T|83( z7c)sV23o{cV{l55T+e0r?uJy$9=Qn|RFPQuyQQ~+ToS3-Gnkdk$;j3nl4-sc_OALu z6x*lGj7gd|ul&J(_uvuYXk$0twBN>8?RCt#ERlcO2bR`46HE*bCew}R?Tfwsx%1P> z((z~}115~e#+KLnnBTSvcl&pSj&ttSTI$Bxk7EzhK5S^>6S1Z`o(BA`2M_07-GnRG zZR-Y8xj5p7C;!h+@fd1@_!VH6v(dLg61)mN)quyG@(y!X*r}~)F|I%*sMTy58Z$rc zp-R6utWT%S$1OKhF`!oD8j*D_iSk@5j_OTnCOM&AjVv8~L03DH8v%kUTT@#tL zd{?Lxj>&CqEvKUIM%*b|;m;cIkW1-~n%W)9AvVH0ej-Y?8$#+Ew<*IIssCRO4(2Ac zWrsdpOfX}q?tJDTz9Kd&b zaA$t{rsfjlr*E<;WJ*8$ga>DoM>IQsI_+O$odVXDEQL*pQ88(7)8oqL(}RVg%}(ng z@_u9!KHh*gC?8!0bbkQ@`iR1rWvRqlbMn%}k0^fam7pR5+a9q#9;Q;nOp#DO^n(p} z)FB~Hbfzb^24b9}pcE}TGr(oG0DXYg6kLWMYrxU+r19rt8adq<;=RTh>vC9owr^hp z+i~<)r%eV8cBcNEKYQ}QNAHCX;!OM!WSeXb`qQQG-*&f8=T24Uh5kkZUQ-ra?1Lsy z2M+NL9Ecv%^41g5o_P`;va3e0`&0|Jwc*xAJLDVIJC6jF^R@T7XGE<-t8AusGcr zp7&?TzoobkDOyrTX3Y+Gw%j&>LWUd&XQVn|W>A)x1z7mS(iWYNfXrkqVVIXOp4{ z7V&W+yM}yEtRu#vi6U5J6ipO)B&k<)OlmCVtDn@Sq6sZs$|W>XWfRme)@3?PMH5<# z)5#5OQeXQ7ebI#WD+W_y%yYO=Q+1*vlS{^;2|cQ*bVq*rCaRMSVy>bI8D^Sc`S2;yC zh*7FW%#dlzDdfk|ig%(Q*;{8tjSR*-S*~&l8NswRMf56APLciSsJbafIYss(dF~P_ z)n87bfH9+-B2OM%_jFT{gjl~|%0nB=DRfJh^FAspr_cdxET_(@@#&QZRYAOFBH#N2_iewcdC3GB-$>$|+>9!a#fea*D$KX|8e# z4Y>KsDKrmC@XnDnl%Ex|HsRhA&+~E$8HHTs6mkY!3zbvIkBJnm@^T7!VG?P`T}~lChA}fo zIfeWbLd*gz%vDYyuNu|?FL*>2Ck%V0wkwNKavau_n0ra!`EF(X6ZdHyjeI}- zA-p|8T?Cf?pqAoU^G?bdwah^FL9IhvjKm&%rF8j5)Up`*qPDfr^4%Z$$`<~Vhu1)) zufSugN85~gs28n|)r>a`Vb@YBcHf$K?bm*#a0!EG{hh+E(j_tsUMjWP)C##Q@V4v) zafeAQSy%-qNLpsV|857;avCIiL6^trM7v*pC)yI$6_>DvTaER`#^`NJ*F(i&l{cfJ zUWFxH54(i5C|%o{sjl~5>QDVy@^4@oi%J=!*U_G4tSWgl@%_Rj)JBO8|FaqWH#IGT zw`gWBlVkolRW&fKiWfX)uUe#d*0?I(VU@k=Aq zaVoskh<^iTiQ(-T{1~=V;ak$i#(ha6;e?EMD$;c#B2l*u|uzY@WmkeLZkdt{f z*U-SvG+-r*Ivh2Rb*ATgV^otpiZ_jlV#fosK5?SqwGK%!`2Fz0)tkimgxeRsME~>r z#=--w+tRnc?NRhM<`uEAG#Hmv0W59y#BHWdZR_w_kIt0kmAzqCG&4PSj`yh_k*>ko( z>UP`xgAihRY?LdU)b=f2uD6SAxAz{W1aq0T2R(<2}-J{5Fe|rdc;3U z0)kh_qy%IwLqOI7ff}lQ0CjmD+i8suW^-l@1I8(dSj&(Iu9L{(`9>0wEqG@v4yl4+ z)6N{EAifxt%AvTkojD$ZW?3$d%oM&HPCsLM1bu)QNQ?+ZhEIe#4Nycv+i5(wI%OxE zhZjXlQ7@=H9kmA|yf!L^Aaae^g~5C1VFZpY;9+EDwSiBwIh4Z>d2p-6QnCBFOSrrl zZwyMku^iaJO)w?SZg|&Rc;UMtp%(6#%z^(tye!BFaN@9W>@r6Sp?>AepE<0JGRKctK%7 zwZT|LB&L}_i&xTFK}#XX12!CkZKt-@*f1d8hsy)52X{jgALNsNf{*|~^k7OD?bF~9 z*nVozU+*og_0IK&v0#(uP@zmNJ2Rl>@h3hw4G=vuy!AeswQBq_x6u(u;}L~Bk8Y1c z3Eryb!5R{aK#B|@KZ01CRstS@fLl319-p7wORYaCku`QjjTB2ybp~6#N&5hmq?;*J z*B;%=paL3!`I2XiADn>g@fzGHS}XIUkklD?66T`Eah*J+o;jLge2^B&E zoO}`&$9rP>n-gwvSyT>*qEed35+CCQ5>YX*NaUp&mWC{ibO|OZC719NqH@SeRANZd z5wT z1ykYm@p@r%6}2m>fEbqBbLK`Lt7^k6LBOAVpg$M{95V6ia3Q+Kd-;t%Y{DKE=?qU3 zp`&N%@9AYURC={{dVve*hARqY*+!KchHX2vK{S5Wgblb=L{G7pGqtu{j;S~&0VWY6 z(xKYZJospXi&M+AOsTty(OjHb{(i)Zz=nc0A6nLK8#=XC8-cW`Q0`EwokO-oc&ffp zl#kf)i7W=@sbb&Y2Fd~d=+)tAn)iAQ;3txZL!y(&MM!I5sn|G~p$Sq?5QLdOeHavk zb7#@4M(L&R_0apGNfsWq#QV$eY*T;QJjz^o9 zAMH=iKNN*>1!Jy5g!pHIsn(lyb`D;IuWP^?3jmZ2w12AwW`SB>(Kk_vG1ltbBOSS3 z1}|CNDIB?sC$OC`e|pea>kTFtY+0nd^z<^hEXt&bsKQ2&DU&4kqDB+D4Pwm;LbpLD zkTGZsi6R2|pD`sZLc#J;)yRd8x7Zg4Km9ZcQ#y8Wk!;&ul2^`A;hi9&F2sYG)%!h#{qUFXJ~ zZofCg`ha*Fh{S6*-SZZX&Y>$qlA@Xmw8eC36QfPsGsA~by7Pq85AU9jg!2f~eLef)-e&P1p z8wc`=rf!`W1#M`$)Vv?eT#1cCGce{^c%p}Hi@z=W5vXtqu1gRVS~H0JrH;sI8Jbz0 z!86#^$z!6*ZGSHM$@4S)QBuu?nxfSBL%M+&@)692@f8Foe74x zWRwZ?llLNQS1~!3ycdF|6J4s{P598QpyNWiOF#r7=sun-mZ|}ju1eE&cD2bP_#Q1Z zQ{8^Js(_^s#J2F0pHjG4r!xhJM zuZT&_$DOZ<2y-|+QN{D|Sd>uB5ub4LX&yt!Yiv zvO>03?vW+7q)N}D%!);AiE&K_wj}HMf(kg|lf55M%;w8D*{_IvDr7O`HHkD@SxiS% zwOtP}EaZ6@Fcjsmkmq53dKFSg=w;^zxROHq72R5k#53S5eId_7&86h|0-qe|QMI0j zjFEHCLtY}iCCA=IXf0fM9@(q%o3aDXBj18xpj z`LygW(711)0ZgYkm}ilquP75V>deLHDFUkXErbC(K)9$YD?i_x(26q zz2F*R!K5BO^L)+q`{#`XueB$ahwIr)eptn1ItoR>uIwjnKr11%n2e6|YLPGPDE~Er zm)b8!Owhff;be-YGA=Sn5^h|+Oqa^Qyd3qD#MiiHOjYx95f4ZA>3M)iY#6T zH>Ha##F`7nWNbIHTP#kLCK=OI`Y@eAMNg0}qOY_3Z4d6SqBQ1Xqa1fGV1p8D)L5a$ zK30qqHlFA!0zT|nQ+7g{laeA5Ym(%VDV@z03!WIuFfVTh#>w6}zlr~(`HvjaE=e=U z@E#9dZpHst`cS)B#@-sdG-O0FH9KO0{&(bsEGzEJjX`u>S=w= z2PN}Cjmm6k*6uK->s20p#$FskWsU?jd}*tsgeW-;zfOw9 z5f@C`P^J%CQE|GpIp{5&8;`cI!l}23b?_%guip$_#s$?4+di#@TgvtE_Cffj2D~Z< ze0U_r!pi}H5c^zkQpshmYuL>7tX+BaB{m!C`+TH@#3=F6p-pQYuD>wj>lpQ^(L$ zUMP5o>RLk&e#2mU(@Q72dO6V!w5&_77}dX~wsjNE%syO%+jXMG7C9BVDsi`)Tw0lE zM2_laa=XKeGFLv`AN25|cl8DA2B&YTbnXCXT0g17D^0B}CxK=_>**t5%znhuhFGyKk!;LDGU)d?_VCb%p>eWdKuR9zZU*0k6MZvD zn09^RBf*Qcjxz(1ZpAOL{wO+k?`Xg)EMbyeUe4jQHU`0RHh5_ocP7k@a2UGg0h#K0kmiI^qP1at)3{aVnkVigFX&bT3>xlCZ0^C;6Hi51^vSb zX6-@$LeF1(tcSgM58=z;QSq#5(dM>f@fkE9`R9o+ox}0TQyrQXi`xJg`J4}rdrx6A z%CR`X=kH3rQ5|(CuKCFXAMs#P3w7?lwAmSU@53gpCG6y)2gLGS$A8g)QOyYWkRTHj zx1GV2AiV7q)>P=7u#6nOzNrc4YoRt;$jCvjo4LUiF&6nXm+gq~Zw#&xHQ$71v`H>H zjhWls+0%2hPB+PLvk0=VWF8mS4p58&oBj!UdgF61c(YJHR{vEgp*GCqYhm?$SkUvWOI9<3%Y}EJpK- zIL}g#3k!f064)RwmGrqNieyIm`3F6dAV_*-#yrSkP6CwugjYdb0HXt@MUt>UG5T{S zTq`Mc5FPjc4s_Ma6SeGISRf1k92$@gKAENx;~v1B3DeX`A9~}1|9CTT5wzT zXYi>1cb>cjB_>3d*5VGd2D?BZ;bRYJXwwK55#>Yd6x)J3{M%Ef*z}4qRPa}d-7gz+ z;V^F_U_mkL?_@Z=6k7frb~s@u$|!26|Cl1FYCc$s9lc$CsA6WauZ zzVqLTS$ex{735E$U3j*^S1Tw8YhfB}WG#5P8K5+>32m*##!8iBBEUc4^eGEQ$PrM% zMsUG=1r_6E59TIArexjC49LsjrG86oKxC|wRGSC?)`J5H0azSQ)|)o@lz5iZ-s6-o zQ197FgB0O&!Zwu&N^Ikk!z5REqu_#0mV?~r7rZ%2aK;#g$PGwbEGg9okPP<^segR< zRBt?e7;omqoKcBdMoP=EE7j)0v5g?BrME8F&O#VySQVZEhG7mnt4SDXL6cK zF7{$!(Ly(^!FwCfO*A3}30F-XHemA;MCcuJk*08;JVrDp^?T~ZM)DE7TH0_6x9Li* zgRN^|ZZg^$$P~Bz2mo&rQG~sO&#98krz`Xp&I^dq=LnnB{Z@?cx9NW12; z<(frq>eb=7V!38=GmLG{;sGt5hb-4ToN(Kkwm;e6bNVT{jmA(#x9LWryVmaE-MX}K zu0{i(m0u?IdWfwB{;)mjoTYqT z3;gd2O%X~H2xv>a6zy z%J`So*fCpH06X?wvWwVa7kx>EU36tFL-WOw-+vukqqtu>7rM-T6+oALHxIhemn3xU zk(2qJgS*UT6+oALHxIhem#!4L%uW?Rmwh)6y3v>1qWiZ(#7Su=;x_uoFrA9b679C< z0#3^BMjxkBb+DT}{5n67mQ4g`J;*|NuZCCptEo_4j>qdw74whaGH|_q%4Z8`r{lA6 zFnUFQ0*~yn@Q{cY@Iwu_PkZ`Rd`%a&nBM7@wGOLnxf~Oc2^iyFJLULzf$5!3R}VDlW6i zx;-(c!oojz5T*hRquh%?A74Irw%u7@$5vHxBVY?J!0ios8|~?3oLq}d_+%#9g#0C% zDNn994WwA#0?bAAx~(^`gyJ!{-@iFbD5f-tgVf)z`aqPAqq7a+N1L!lg+r=bjXujg z(wcQK@{jb+M#JS7=&PV=QX2`lDva>Q_KFnsB7~K1$^}5i;rH%YMdf}To@>H-bv$F2 zkok-3Wl)2RNKFo+qy|@UCgw;oCc9aIR%r6=q~sf`vL+Who9&I>#%O#wyu|wFnsA|N zc8oT0Drfz=WT>!|%F~#qF>h2sH!hHS^@7hdsB^VCYcnEtW(4zQQqnf&@Ag@DLlK&Fs}n86)~Ho{8a-P(wFNr6KSZA9{T zo=1-Khs`U>;83B93gv6*b=UggJ~45lFgb4Z!$k|-jwqv*UKC@E*c=&&esZawyUozF zUO0G)-X>ffU3nB!OQLoaGPm+Qg2aMU=w*~xu&4zwE?M4!WIdj@AdYxsz3CV81Z9k@ z7epQvvXb(WL=G*seV1Zyu^i%8$jdO`CrDr+FT?!s3gnOQ#%j}dC4bfny0sCB8;~3n z3V9i74keH0dE`icD)lmCd|Z1O@(Sq`ezUMdJ*hu_Myaj?mV~);GXLuu z@FrKx*VK_6%R0dbooTv)8r(G=%X(1pKp=c=(_Lg!hXPmAR-M8HZ+1qdG(U67mVGw~ zt&xF$bQsCkaY3kybCpG}km$vm7h>Oql^E(01&sVp!;7`HAR3Yry5AU|pWyJ6@JH)L zI#YS?Q9quk+}OV`ga3K>KX0{~rtA`#to&s*i?g}J+@v^VRv;?xG@vSLFoNi6QTvX#|!;0b!))em^bnSJnp}xI&UPyDV16j;L}amtcOO_A0g2;`Z}nW zKPp#*Y4`JmM`fHGy+wvEHsQ&77|pyA)6()U5Ay^24;}xp8@zsZ#W77&QO8YcuiGMxp@l!2Tp<=jV53W}gH3b(03&9Tihk~t5jDzI{X^TgI5Nab&XN~|1Z zFWs0t>*6T*RKq>;EV*6Tp@Z=_V!D=8*PT#)pCHkL7ZnFG}3KSWm4`$!`H1n2U3lI7al=YS^3IZgL9X{BE ztjbI@(D_On@C)k5OO;eMxxN+oxvgp#Xb7>8b5Pd6n}Y08`qmYTMo_-~MQ{`Z*E){} zTn#7TqfIzhjbWiXx5~4wRLcy)SyTg^xWE;0@NI>=UUe#D}v4_ooibkxFV|K zjqA=UI>JpU@~tbfYLugC-`(1&W*!aTT;!WbIIe&IY+`zf~Dn0DggT}&a zMnt_Q4xewh52eOFHj;Qge(9dfAq#^m7Pt0Vxqa)F;G4sDnl1+~SWuhp2a2yBdXsZ4 zr)Nw{@n%%YJyJK_;Sn^*t-Q-?M;@6{j zWbJNmvp4MahU=F}%99S_44kJin2-iFFqOqh2JDQy22c2pS5mdaFtg?_{E@32Bs#Q5 z^D4;eW7zWSFVI(kbM*;#Y8IFeYBM(0q-Faz{CUICS(l{|BQnRohLqmU{Zeg=HPGMZ z?%WJ88d_5w{+pM1-)D4FNj$y-13bcg$o@{@S9dM$y4?%Y>6%7IhCTdr1FlYXQ;ZIr zL=%~(6DnhjK6XO6F|#Tw;O<%`YbjuqBQL`CEBG~6N3l*a-%x0QdUd(xgal+$kT2B; z1;6R4|0#_X%2(X~^qO*bp?If&HkM-0{Sut@K5>{Sk735xwy~*N(XCWjJvN6$D0S|7 zpSUNh>7ytuanl)*xpLI|R2xl=f?1-BgsQfx>A?_dbl3ZoNoZxv+)WxRb8RYKj-m|Q zdZD5SF~TE!eQ|wql_!GluJ?(fgj%s_oKF>d|DRC9-GuM;wZo={33t6u)?DY}^=^8f ztXF38Cg`Rje&4LFQIP{SG!rJ<@2>ZWTUD)+SFHDmgDj*6x@}U9-AE&6y-yq@Y8ufE zgG7nwYe&6LnQO=VG3?qVBH?}2aCY3;=tZ~M%1=?+E%Pc^rl6;>9wc)Prq;v)(6e5fk^T?S0Awg6ca{ zh&r_}pDSSs8#r-oz*+B8CfTWHN=!U`?Wp%D$J#mklsd)2z-V{9Pg!KDw)ZJ(^(5}^ zCWmajUfJHKce$!I<{-vodQ=S|g@_%M3Ug~IO|@a$qTxJar2{xC73Ls~b74slgPT%e zZUs_+Qd5kC+CaJwdty04{&EI#yStm>8Id4pu*1R}SrpyW)NrdQMe+GmQ^Uc^60?ao zRzrP9H8mXQN-{J1o7ltWQB4g8eY(iPZA}fgP|V@%ACTs8^m;H1k?Y|Vp3ur?2W@SQ_)=RYy86!~XTr73aNG93MmP8735}?r` zi1Wxtm0;VM!??JGkvLCE8Q#~(yx!do3Wk|_PYh!Ph^=~RmEqK;h_s$%)nadf$NV?B ztrknT1HaRRK~h*!fAL7^vfEC=s-|3$ z!obi-G`zbe(@pu!MPjdvoC*4<%oaZnWWrtD8Z$;}qZ?-?wEjkA)h!Gznk>whCw4ls z+psB7kAtKT=FdBv-J12v%;ObwQ|XDAB%?Mi!F!spUc-?$aMS8eVLVUjS9&yW!slJ}Z7|`*7sgHoPZ4D@N$DF|1nPBg+&@-! zrcWHfgYRv^V{S!~AWnB<&ee01wUEh6d6=`9zp@xqb$oI+ekF9437;bMNkxh(l!w4U z&{;6{oFitsB^I~s$;HlQd!x588ejGscg(aZ;?qw!H}MIdMw_)NmdP4j32&+zjxqZI zf7z(Q6w)1vbCVqV07?N7v_#<&c@-&ub$Skdr2%hnzVk$Lc-%CfbJ<5-Rz9e}R#Jhn zrX-bMuL6>?S~mBr3LL9}@^osfb7O?l7tE1a0gmGdaV9&dJaSB0QU+J6r=P#tEmbYY za^An;auPjtax$l7E;G;i4!GUFP48KADgAsg)~|OtWS+xI;byxT8^F8Slzw=?nrp8Q zI+KZSPbZTPQ6{iz2)ZajG&DF0f6{O`Kb~WR(vNmGS(#gjvOLJDIuRTI;wBeiU!H&? zv!?Jqms8(4-09A!@`C>v$-NujnaZ>x;BUxa!Y5r0bXV|(b93Fz#a67tWI&K}rK4L! z@NMT*!mqn(AP`M%3y%FM*0bxh!pb&gs)7}!bz^L6wkM^Yx+;d@<<`VD0{U4a`{(VuZQ z)Gb^l=4{|_;wArFtf2pR6V8&i`n}gTID&joGA)K4>+2~>4M7?^Uz zoVPaI@CldWML8MIsGE13RA8p8MrPW28r8Lzr@4-L`P(jw&ku6t=d}3T#qC!IjX4}` zVg09Eruul?(7lLD>+dOH$;iaMVu(4L&_)CzMohg+hV>TOO1E6Gk!p<@&mMOZ+A`@w z59wU`(_I}*HfQKgF<&2X4M$GZ0kxvAklRW(BcYWMA9p1W#!t4VtE|zOgN)Ya=JI^N*@QL@5;cwJhCzxo z>1#(5+A`OswYhcjp(6Jp@v9U$CVfWA-GsJGVl%G z>>u$>a%HIMl)*SZ<8VS7M-8=_(|t;PN8*OFK$~&PdcNtq8y`il-Kyga&CR)+(3XSu zoR@OB9+oTQPSDfbeoBpc>TW_?7K?OM`87MSjwTXiVasPys3|N`q7Q%TvL-0DTnKzd zZT=&y*5yHXz`svl6NHbl=uixUdilJ=54fxu;_#-MH0KGrtDWH13HJfx>9I`xPc8?n zRRuShjemdBVNDT7K<3Nwo}9ai0B$8J5<%IxwP5wR$dOkG2lBXn@LyckByqH(d!H&I zRH0rK^GA&8=*Cg1K-J*0E^C$)yyHA3cQs3jb(o9@dZO@-JnO`36^aE;i*FJ-L^Vxr zZY#5>I>J+ECDxK^ygiTNt-`B>u&hGgUC~X#GKW=7@k5m-*V0w`POF+?NaJFFDyo{| z6-cFi&E9{i@;-l!6O-kC^uS$1Vj^+0+FX-e_KaJY>|sC3{P+{_I{$cCKR)i7#R`cZ zZaTcbT|l3DW1NFtC82X>ZmHXAhG*)I+Dg^Ul$&tlg6BIMSIxwE_`J*dS3w!5E5=Cw%0|S7hNRQytxFY;{s7;LHDB1;pig6xw#R^FTV^v=l~^(xeqaHXn(({J*7@;hMe^yGYJv*$MsG?Oj&I~(wX z>$Fk2l-=#re{OG&IqYiczx-sEQ~$ZATXZeBD|L?z0;QFsss9o-+|>W)M~%CPcOvo5 zrv6JL$mm|E&cL~v`Y&NrgsGKX55PgkOnbuCKXujXO%MHHTT1-PF>G4$w{yQ#ISIs3 zrzRm+h=b1B;5K5dsdN`712V#=mTuvqWRBI8qpe8js@+aE!(%wv)RCKssx{T&4;tWR zA0P?oNEpbgP?{I$t7^mMj@)_+`to-n*~&G1#)DT9B{7Kesf+#T`uX;ukbW4iJlVT6 zJwEM?JJbGX=IhDAcr3gxlTRgp#ptR3F2_>2)z7U*Ui$SXm;- z@9rj)3?fGdZTxTCU6FOwdO~GQixGRTF>U zpBj#)kDJM(t8=c!>{1ips5O>h8k*hYK+!Bz%8V%kH?tlSu;^9OZ*?N~^avh#^Zj!h zqki`o9)=u^>SSOm&9}~J-siD1i#cWBY*Z(MGma`H=>qrcb2O@xfvhAgE5>)1Cip{F zbI%y$T=J`8X?9cO$pBZ9+(8QLr(6Yl7^Eypnn<+k5CEC7RYMpbI>*#8g_a{X ziYq^Hw1C9qBr1dYe$sF3n-$_)u@UfEcSxt*YhmxIFI;TsHy7}@`_-;Wo{siA6Wp^R zQpCTIK%l>!`z5GI_?ds)x(2p)JJU}4>}b5vnfh~BH~QeC_qJ}32(k$CtJ@+PyuSf! zPLxuhLHZmKSFk?nrWi>Qjo@)N4;FCXSO>q_1UHSi!U{SS~GIO^(Hir$RiyO~6)Imy_0-nQJmHtxd-Hv(WX|+;Luq zJh`#O&FHSR&ZNKI9!#bi?e*SZuymp`JwZ!Cqt{FK53!ET-xmG|?i>$?OLj-9i@pB2 z^EknO-0O5zPU{OLi+vUBSOv7$&v~KQZaHy6BZLvSfLzXdqic~p_`4=NQ#aSu&G5Brc`XDs+HW_$h(1uK zG9NVL2Oq4}tSjPrS5oc-9aV(-^7t0D$PxU9Cb%grO&cxQJI50j^(Skz$W%h=^kGH6 zkT{izU)ah%O9(D0@hUy|qUBCxDN5gUB*hds}i1HF^?aumo+?0p@ zT-=1W`{=F(&ySJ;t=Sp;&zLXZ8}dB(BUc$z9k?i@adgdT$W$T=(@ELaoG~42jj>NB zesK*nUiP=0=#9_y93}4bymT}kQ?Y>1MBKVj3ZHI=#W>-GWv_Aiu)&xP-{5NGmIAb> z{n?+3epXH6bd%3k=qR7>76{Se5{LBdbR>RS#N(D7Akl;7*pTNz~ z9K{L|omUS!>J*4qh)k${FJ-c1w4U$<58PDKD-aiSAl+B*W=5Z4RT3F8T0tgOpYQRjqia#rB}L9?P1BhOhvA4L|SM#fN^ zHZMCxri`I6R#+mm->Od@$Q7AxCQWB)fHHwlP5xRZT)~-Ylu8IratVZ{p>t90hOd+B zf8B(q67R`5RU(T`^qK|}i@6&W+{vM*PNsizUB;X;ovR7HLeeH^rXp?7)Vc{Z zn&8hgpj(GB`U?*XMx*hGQMaN}khI;Ua^UDRY}#Rncc|rM?3g#<(;m1PyJZ4J7A&eLn3U2#7WK7 zZB~g?F?03D&5BZnT7IfaT548RlPP!OUE(RDKf%~2PABXPr(q4ILFKAs@;rP?jb@m7 z9-IeSO-|ZIzy!@xIH6uOHrsl#>0fvi)zd176JG+Yy~JsmIRJOS?fz|g2Y^l-XdliQ z()v(UWmF1msW(6}(DZdsP2HSg^_bQ;k!r5JKIlv)zCBG%rs)c7_}^Gnt*e4;Y{N&c zEwW#NYAUM~>r+9&aXB|DPY;Rc3zmiK=|xzb%MV~%_Q$HKLsd|OT66XF^|kAhVpEOD zGGB-RCS7ym2I%*E_(G$iW&tA~xqZ2ss#4?XEEp;1k8)?W-9(Ep9=@%rh9en!%*vA+ zT~0c>7?*MQ<^mEHx?SJ;lJrX3*e!o`t!iOVK@Kl&z_z1 zrv488x1g4oprd=AvgIMDrtnUdbeyk;);Qw6x*m;VI=p?hj)>r(zurTam;4B-Ifw8? z;kw$58lt{}09R9hD|4J?Z#`17cO2`0FNGIZ<-Yg|-Z=|yW|tV%fk*+V_PgTZP`%^C ziv!jv)oTJyqZE`D3GA`1ragcc<8*a*+#YbwO?6MEFb4ga7Q*I$cU09JAk!1y(KiRw zkOm5^eMP+iGPd0PTl9=(KQ;0)ix7-p!@p2n)i@7#X#e2dRdojNOX+q*5|^Q;X-9Wy zWojE9>#wL#Ji~bXB#iux8hMR=x$SWZKIS3L|F&l-heBjap|syYKo<029YE~ z4)VOSQBifV44K#%QBCAdUN!EM$UzdkALHu8Mk75>wwL``@-uiKd0(0M347PtD_Qbe zQHaULp;l}Jwk}`mLFSmg8eZwI+Bl{W^(jTx+LQ1CPWV)@iyk%gV-dN9s=gyi_NXSJ z$}n~fD#%-Z~}SLl{k8Hlj7kV7ntN09_Ua9;`3=q%ZSMT^*sb0I?m>`2UE zr@Vs2ShCo3b*)1-)s0M-CX2xz7IR{aaH#1+Q_x)1LYZ7N=ct^+cz7gABk>Jo<~wx2oM zacbrI#d-`MO_8z;_?6P|m?bV7?wmvl>NEcEt+knR%5|>l;!4Szpqq+{3-g7T^{M3d z6bnrljgxw>HCIHM=^^Eaq}PU96zj_-r+e=!QY!_4IY2Y<7XQX%(fu zhDy~m%g4_xiZy2#si9VLx=*R=Jd1>M3IIX%y+#rn4=!%pnqt-qzPmOPpYWdZQi{|{6<~R? zHuH-GJyrTC&lCB8?`Repw3DAqy6d}ZIy6!y6{=v(k1d;AQ1))`fAHYVt`Bz@T` zOyxit{9v=tOvX%ZMXI|sInVoamCA*iFt)G>aL!uJ!SGe_5NC;Ru!F| zzwXsv7rW-yQsG9MoGVpi7@zezOi%=kQMgT>bp=>uI~e?<2Sx6-&mxB3Jh3$hq||Ou ziJhYwN<-WBbcIhf+@pVLYr}8iKS33QEi^sG$SvjRQ@CPr7c5@r^)}mhXZ1uJGOz8H zguiOIPtL|ygZuOG3-<&jT&vlGPlR>{%$=T)v~$6R4IDCBWZb7wIB)~Zy!Gic^R8bD z5BfM&X??ZKe2%yzAfIcE*MSr%+qb}7;apcqWhxHyt!l`cFZ5B7Lr+dZVZBApOUf4alo1E)8Jw82F)swLt;+L9*y2mXDgq7F&PFClc zYD%k)b6Ta@T}!n}jE;TaiP|B+YtxIgRTN?<+Mc8n)p;s|8MvSZrc(N~2JDo)22c2p zS5h|1Ftg?_xn{@>fum!4R7Fo-9~*bN{sMhf=xz?9>_m|X=ByMMRbhO-u1mUr6ZGfB zuFI0aV=V=7vqJye#;D&7DZM@Z8~LT$C~Kh`E@fN@|CS>>bfiv_%{e?y3~0YH!|9G2 zWe$g{V|YvG-c0wBRnvQC(>|1=L!1dE{IuLqh1cU?M8US9Dw8Kcb@qawk(yLhL&H>z zF>9FmHCGK&oo=^%FjmztuDdO&$bWT-+5=P@V+JohAf4_%@SCo>rP5g8W*y^ICh8lS z^J0Bq&N`(y%#;T+9+)bxc=E`B6Vtq6o+@s9AFQk8XDq(LwQgyaA{;kuj7 z6%GuwVpBJpl{RptX0>W~^-*+1X26g$t4}hhWpLLiWzBUiwtuDMEi2YvVV^QbrqE27 zY(F^bl;T!ZtK>hEPALwLka{jVrMPFSX+$>+5>KM99d$}&u1z~948}uYYN(c*gsfC4 zZDob}96>0fopnla&sH;%Un^t%F+ZVn>|MOD6(m-w#-sP$?nS&V9 z?or)Dx-fgiQCTs!meN!kc97V1WE4C&D=X$8jk9J+xr3X!ZEgips!~&ogxbKe6%JGo zJ$R3+5+0EtXt2W)9$6IK)Q@qiDMb;hZvbXb_q)-2{312(&Z=)XSXl};5l1)GH{2R7 zt8ci+D#^_3FQ3d4YH9LIs=MkN4*GOCMm%=jQS}YCP|V@LQT2`dWS7-9+|zYgTCP;X zP4x}8CZ(05LJjW8X?8PvXI(l`UA+zmW=sHg)i>O;bqpo0G@AqOCfUoa$_$*V>KpFa z5>u_S$CXv*aL_T#nXJKwTz21)s)wa8UO*GF88_xFGeC^trc_FqWRl%%NhIf+0F7c7 z=aG+=$dAWmFbQTU=uMcy`x;Q>Ru=i_$S_myiD9e&v7L6-FSO9}=nZ-s?dj#X^Xn|D z6nhIi=D*2prC3656D8zaHvCSr&>aN1X+wi)T*S6Mp0u71w7&xRGij~8j(L|MmsLv6M&ekPH`u+hqT}(}46F%F7Edsog?z@l*YBNXAqAWW0 zKd(+#-Qi`Lx(DyB$#heGbCK99BWHp>DxxY`l%oyHfibxjL<@tFdBO z(Db3x3Eu`YMYjfW?xC*(H&rbRC$)j3IU_@k*dRcq^1zi9t}v8iHqvcJRnq3JZjG&~ zCN%RqkjGqp*iB3}V{Ix@m>sU>*d$mx;(f%S^a^WY7|N8%c^naUb!!X^6W4Wl;aWA^ zRO(TmDw>Q*)gFqwx;1OAbAE`Mx;5*Sna3;WrqUC6UT`1Y(}eXJjs&_~a4knb>Yh4E z#aRnOVvh?XgyHk9`ZgH6_`=x9;I-b!B2ce`^5gJe`)V zKEkO2j^hb&f;y=@a!guM23M=6pTD$79P>jxcMpEU<=lGe;4zJ7%*JP%wMrPW28r3yI zDD?8TT^66KhO^yQ!amXYj0Y2qFuB{Q(scNg%TynaTUh)>CeQ`G6O?AFh;! zS6a`+3xiRMuV^IeTbodmzEQ4ou`PF%Zu#U*&`m|O%e?#6CN=IRv}NwKado%0&(Gu< z%HBw2vtc+qXyyHZzc%5F^1*ktM6$&!YYU!ga`cl^c6(7@U-6^4~Omb-e9mq{%Oli z_9O%dA8=VS#NkahY3}6eu6BZ3CyGR|U7!LI8Oj_~?wHBimv9Q<{Y{58MH~T{FUQyB zyQ>J`R-z(NI&Ljked3)}!Sy0um;$jGT2hUt zCp`O33aEmxtU}*i(M`fKhgD7SLzQuh(pCCStD0g+<6?j+s+!^zNTq(w-hU#rWviO- z20(WWiHXF~YI99I*K0k>eC-qPI{$cCUppx#Fzt;}pcsC*3GV8B1oWvl#yRL!658ZN zn@Cc^Wv-5g%+Zk>65vOh5Y9n#Qfnq~OzEjtEFFQ93L3!?;ZnU`zEp2jItf411UGZ4 zxo}1Jl7%OjPxd@eH(6+@xt=$sG~9UZl`#W=;Ull#={rKN; z*}KSFyvUQUn5m9#K(;z5P(91x%l}>l#N~Pv+!ZPFj0>Erhti*S(5HY#!+H3GmG>q- zy>l{0y$UrYT&ewY*{$%M4S2$J+9+Mh?sn=wx3|X}b~W{1ezMD{|J>7cF^oz|-D875 zY2|3@zl6=X@n~za-R*5o@>|zh()uw0+)e$LNRZLJP@REuHT7S@h-0dC<3eRq|2gQG zX=kzqf9k5&n}!I&7LoXuW7xFhZ|8ogauSH6PEA6t5C@&L;kOZ6?g9?TaC5AtWRA@f zM_ZB5RlA*T+G07`)RCKss=Y`52MusD?un$BBViz~LTO&0uc{53J96tS=!@cPE5(cz z@EH$YNtDD8B|3GnKV3iHUJmJp@ye6EOVi`i-ncXEkB0tE;n$$1_C|()HOgezikp-H zIbnr%C9KhNalOc7bI_lzE*pa$>CUJ)^yk6%yXsvN@gm(__l|~>sdVz+4%@oyS`kfl z+MmIagl$iC23tLU;pp5#({K1Q?ruV1pem1J##$G1#kdssuI|zWhC?Vu?ruV1&sEb# z5pH*L6H3OqQGHBR5q3o=(NHo_*_?hxgOI1xNX>(@n^4#SYUU~<^KOP?Wvooa%F#_I zY_*V(f(bgp&mz~DpM4&wh)T`rsZEug-GsuZVB)rVN@k5ZU6(eQY=@hafFYe`NE^Dl z3B{7%T=;)0!*@fs(V_s;a$S}?67*4#3~1J+Y9@~f{;A<;`nZ`qx;p1t%r1?HI%UWRfVaU;_P6oEpd>c~E z)VDX*36V&In{YmZGma`H=>qrcb2O@xfvhAg%XX&*Z|1K7qMU#~bT#*kLCz(=Dwbw9 zMV<_BCCMG6z<$bAu!lj)qNIsLyAA)0K2DLnW|S97-*EILM#-f_osx7eD@E>+LW%32F$WVB|A+A;WASB(u}z%&ZN zV$N~BGtCJ3y_FTwo`N^{uP<9f8!o>T0nzPmR8?6Sgd#|!~@&}Y=w0>X|0F3%-)*JOKX#fz**?}Ywkoq zhdjBl#m(rhwa%o!-X2V*8`1Qc6P@V^S`r$)Ub=sXb!`5&@JDbb&Z$|lJ5pWj_0OH3 zPL{^K4!amVEIH^;+8B_SOgqD=-?+mwWQ|)dl&tzyuwxa_s((D%-dawQ+T_MtH(fB< z^z`Q3*Jy75~o%#A&kHUxjd*^uKqW)x! z7MV&&oj$DWRL-ztuEjY)a7l?X>A^2I;K|DEX*ymIw>h786Pm~tkl<}~t%M~c-!u!C zYoaK+$J6m>KzfOL!)|Z5ewhm>)peE&#?=!yEkKFTTN|X-)9k3S2}6J2Ewt;7dCN5gUB*hdthit-X`?aumo+>~d` z3ADHlJr7TjaUb69qq`P7KS~?4W@qp}_#;;tR2{e|q;Yi3X~3MoA^)Q zW@wILg^13p2OV_^#4AK5RKJ&+tpy|P7d&uNO|L*)(1CPcy_=DkidBWkkSP@epYO$O1zMWKURMmSaJdvrm&bRW?$y7wA z*Z@sTE74m9-`K1urN-Nrm|{w=(#??N9I_Hvs=2To<#rTvivu$hA8A&UlGAcTT{2R$ zBIPESNh932XUyqT=%c}P`&}5i~~OzlW5A}qh+?k-T}A!x2bm6i|6};-V$xxZ?C|Is;VziU`xY2BsEK42h|jm zDOQhZjWe6(+UtYPWa3*>$>`~n*feu)-PDYby|$VnPzBl8UW;5?WWNN}lpHD6r-FiW zWNucv)rl|*W5DpatxSGmTnd7bEV;o6k+;J?az2r9zG~u1I z;AW_YQ5}dBkTSR{E)LZnM0jz)0-hR805j)2gYydtX+Ex|Zk`w8blG!UH-FAe*-NG{ z2K|~A!VouBNLOP3WqRT}`l|UF@*|PX-Bs1h^FZmgL=u>x$7x5mOl7JYp6jouR9N2$^b8Z9Ct>7o)F?h^C_d&n8s?Ga z2S4dSQAf|!(s>$3k{`)%Gp)9}_UK+Lrsw|@Rn-6MCR9^+q``xD7SZ?$H?vSI$!7}e zF?*!;%Xyl)357^WhZ%TR-E3er_%(Asm{w9_AxoQ3oy<q+`XGUaRi0?(9FIKdR>i7DQ~QaEN~ByYupthV^ijpvkCD9HGStP5F-(%#?AeUr%h znUx@`X_Wg$0X}E;@|Al$9{jH2A}u7}bd%%(>;Q-|5~~fowC)<1n~b)`7(L$ZFEYn2 zx9-AUX5Ut0i5mr?eReRq$Q_=m*1+t`Kr9j9J{Ad8qKSE>U#E=lOSAB1N~pA`*F)=nMcoe)HGI%f`=9-rqLaHJeQ?1241x#zHRb9MVpV%9X~1Af{Am!g$E;+7|M zHVQxHxv_5a80tjav%&sH4_=ueK#_#BSHj&GE9nF+PLsNt=!?ycj3-m;1O?SlabgLTWqxSrRe;^w0HmHf1r@RH{6n zS1QeES8+ILuLK+V=ahw<`#u$0_^+l8KJE*JghE$xz{N~z)m+nXOgFf4B`AlbxYn(3 z%j(VIl|8BZd!8-O|Pb%O@SB}zT*zVs$m0df)^ z@?UFH0m12(FVNC1{n5ifAKWE@*&Wd2XdrvYC(cxlq1od(}XCb z*0ohoL#fEX_j$nfxyjP~8*9C8w?90$)T6K3IE3NC)@I~Ghs_6cR3*3VH4O}Ev~Hvk zsJ%yh9Ay=j>+fH9CYYId7Kc@~Uc{vH2Dsk8E|ks$kTacezq2;z!4O`aFgN+T2GIV~ zxRIX1e}irPAzrYp)s1wEK}?xNcpC1uU+?})o1J0zzW&B!2``SN6q09n&1m7^sR_UD z!J#6H`;Y(<`|Qkb5j6uy>< z*3P5bW6|VLg2_ zuBU+o2aPx3u7Y&&cbtkC7FEXQH7`Lq$q34_v+M(wTYHE{Kj?2x@Ybk}&g24)Je?tP zMb>)f`orWM6I;+K;fQehSufErWGriRK;~UNIX~_XFSN<{JNoGv0>H3$QG63F@;h~w z{ft69;Z!}PNa;?%3X4ZdYYy0pQnWI6acCh!7!(y=epAjtB!lv^X}8VvwU|-s5{|_= zXiuK_ig|xNei7vJc!J!VXzhTx)6v0{_#4H0bO7gWuH;4(%_~DS+R1}L0zoxsv309- zrr8aw_48KzVf>b zxWRy4*}Ws1Pk{|}Z9Wv*dCAObY;x$4B!)Y7GMl8qm9z7D?@-=7RGSR^tp~eA>P~$h z4=tndwAVe=J4d2Sr?xhN{01i|qPl^J)g?>`A=~Nxu#4{C1pUcHcne%7qJ5%2>~Cys zEM2C%mu|!2^oNw`#Eu!4Du4DK_&N`+Q9j%nh%Dal9{NSIe|5&24> zN15(Y7A-4q26m@O$K*jtvXAn65b^w#^n)i;Qbu-QDiNYoC$tP?Q1YN6Nx`zU6bSmO zL+R%vDPRjEQV>Qs47I0rH^?f6lgBgA(`;uTEL~1$WgJ{CaeO(kZ#HyL+qaLxAkFb* zEyo$qX^tZ(mRLy4UUAE8S>@RR3A2l+{K2Jkqec36xvX5ndj=6tHGRqXbJ4}YvdXWx z5dcZ$SF{4aAWsVbf|3Y8HY$=79FnD=xJ=&zvdXgs5-BL6^0E}zt2_gNr&T_cKa_A> zW9>vMuign{l|Q8AI0HJ(aRkNkb`6Qyhut#!yCwV17D$-QMFa+;bA7z!+pdc!ZMWH@ zc_!X0ZKgY6`|2W2lzjj%{{G@2S#=L5j4UqaABD}d?LMq!Bm+FnNCc&og#goSkeGR= zTV}pbVkTQ4VI~&?h?&{xc0+Yu(j{!MY(M3AvF92E`HP1V+@&4(PB&bXcHld;Tx39} zxrm@JKn;3{vWVxI>(_eI&cXKP=py#D;8|(f?+l{Y20bvzhU4tnv-emgYjn<~V4L7u zJlM{`;RDU}oLMHVLl|@#W7L1x+35KVtXsCr6Sa21BE}G>o&K;l4!?)$700hqrvn|7 z@Si;JIlz{VOeXzvLw_#%8Sp>{dP));w(v4}OlV!elv;SD{9d>UBS`&jXWEOaJ@8h~ zgl#&LZihO$?pO0*k8xNxSEj&&twY_G$Si+IWog+YO9c(w#gBZIt-vu^is2AsNk>%y zEI~>n8M0%5LTZ^TQ39!CDgIj|OZ0<+R@(AKhbrY)OD%x1q*7U;(5Pf7{+lyddIIL3 z?u|#2_91G7^nTXG4$}ht>z}Xk>kM zgk9hi9s%x5G|3o^n>2u@t&2O4lsQN+_qSD1oS?e4Qt0yRH0s> zauV6b;zZfhYGh&y{3W&}7bQ?irHT?LF#{$@ov0pkU?GY_9Xvp_N^uyAld|z)Oz$4h z&+#&7JdDX1M(trtp)V=#kFK5#U!L7x3!=D%Nt*V4!(mMO%zTG2^J4VIA$hCYEMf`wJ7s$)F_+o z8aiylL$6G#5J?KkM6?pQrnP?crJ_5Ev5sR47u(=R<94K zgG%y~+#HAz3F3C=t!9f;&)u04_Iz7$a>F{O8VP~# zG91Zt%f`s2w=C_6BjY(trB({KiY87v5kljcECaLjR#lWjP7pCm2DMSpW_uJX4#+H7 z6CA{w=stDL(LK0NC8c@sUiCvB?2Svw23R`O8MiNn0wpA8(`H{K>B6desburTfZ4@% z`&qzoA6)ArTfN*CL-UfVZyuMxue#+u2!FNJI zwkvSL#R>p2aTM5`C0sw^!L=%6bD3rfs%FzgEs%;+0}cVaQ4Mn1z-lmKB6b+>kf0eD ztvF{xPECQvn^Cy)|6ZdZa`Hrj0+Wzz=qym=TmzQm*Y6Wpb}ERN!im6-M1J-MB-^!S zVY`IKXT!gyGImDzQ`xSup z1!GHLpS#ccV8hyPfhn3wQ{jOuoS^xzXyN2lNdYjt0d|BC_HdrY00&3K6Hbag@)Ivv zhF@sF9pN*#Hp5D6Re9Uy=HPNz{Iqz4z6^Z%gMqG0G0>Nf1yYrE^bTKZ8|)mQ4?4uU z@%9*t#RHB@-0S5)piLll@)}}51GkHk4GxNU@(Bhw0`Cermxs2n4Mp(%Y>W`28k4P>j$6*}qO`9Xu#})ZS~5HmBfN!6&7SpCw4LeisEvev$;z?E-EomFB2v`Wz?WiDu~z zTy~u<@EzFf8?rhTp60t!1137QO+q@Ln24%R@KUgF2HraxpM+6xG31emu5c<#M0Yv( z7tsw5$d=ml&scQ7$Ag`m=%RE8o#3&6#K-bbe6%gGYH_qvp+prG;`@tDm?bd!vnNNd z-~1QZjDk1MTiFbDL}ogkC~y`3Tl?Vt#=k@Xguj<;~veF4(!e zeUI0eo!i!2T=3`N6cLy1N&hOC3+H1`@Cz;PWUv6u$<_w>_73n)`py1udWUzw+ka#7 zEFZ33zRGJXHfMKln_KX=@7THLSy@kijAqvKJhyWW{h z$?{|>+mZ3P36X`#0bD$MagacP?~KVi;S2K4{%aWJ0~}bpItR1Mjpia8haET?8*lP` z{n@jV-V_BNqEW+b97?{giBc5RpzLcBHP{JrqwsU#uYxW#;5;luk2qSby*_ zvt$`JVfC zLiCsDDR4Dhg)owfE>4g8=g#%Ua2d9RS2z#>B9cIeCcwvu>|TJgW1PDZ4PS|6g$OH7 znMSJN2=?LBr&2t#|0dY8*%@~RgWjNxmz`q38gF=*gvKLi>;rf)%-(;gKebpH(_7pK zdm{ItbwXH)`XQ%~u=uYQa6(ElWInWd0x1u{>;wJ5z+XV@aZEpQoKq^@^WZL6cmOXz z7@rvF&Fmf~aH|V2w>;Zi^cwJ-2XoN{HK;ptggP6O{qv`z7ycfYkH?qpXM;=w{9LTf z^mF@P09T`QMdM=Vyi-bc4`qOUFv55LK5i^(WcR7`-vc|2VWJXwbN`cDoq@DvY{M;M zAK(M1)mp{*Rm+G-b8&Xt-0nT_a#%RtMK$bCFY{c{5^fdb3qI(oWP>6bl}=Ufg4u_9 zlL`Ge+t)IIyag5>8jUtl@Vh8$k?8D$04QNm6h|-@jr}4|B|1QUt`S|pZ3InQcWa&O z7(~|DH6iT|C+or=*0?_;c4rk}VR;UB+vb7~t3an`O8C%ydT&DYMVMdSM!va7_S2KF z?L=>Ut|#r%yxyX1*dCgoHcCkE(!M5dT6mj-HJAuXG#EEOEUyG|B+`CQ>1*r`oehG? z%b^HStGjIg9KRoEH_s)+7-QHL>#G>8Zj|1?daWB!204LJIVc7NXxc3xl58ceHROny9 z&PVW4k^i5)cLBHLDhotU?cM1<{daegu5@0NCmjfJLpa^%bO$&Bp*xQzkTfCPBoIh; z_Bm&F_omN_z4u9{3HKUxbR33@dO-;)j$TG|KJmpobgl|sNBBksbwwm3{wKuMh*s0Av6tB&s)o>u}m<)$2 z0`P;Z`T~D2pZx^iorn*Q#9A!wf=lMRNb!UFGcKg|;#-hqPrxO(JqjOUhz5T=P0lp- z?doW4Bz?OwT*c!gIq~p3OpRFlJvu-6vn{rfOv2%nfTP_9t9Or{9Kz0c#gmMU{Agtk557>J@=bgkf2=q@ zfvhL)ZRTLtsnOZBjgdGtg~}U@9V-0S!k+X4vAc<4V7$?oKm2-#U;0s$suq<7NO$(u zjJzV+6TE)l10Dy{2RQb@jtRamlzwaFFIF|&$Lw;dbo@Ym8B%ED%yez=6goCLlQ3O+ z3Qdxqvr{O{G?Qy9{OACyk>6Xag*)w?JZ;gk{ zH)USP>R@TO%Dj-|iwKSCDt?r0i+{Y7`SzxL4_=4fM)E-+SG$FG{T8Iy<|?{)`(XEk zeUW&Jh2@Bn1r#KtPVaAqP6_u}GHgpPUT=g=aq4zJn8u5|lh zH+wlTR)GX2bYLKUA4qXrBb*+{EZUtg8?W==#wNlkFNMC>4z|?6Y(c~7*loQ#Szv2& zF1G^nZQ~UjUNyQ7lM92*{@QSDV|+1l9ROR?ls9vna<><{_+$W=!pmV-@)*UWnjoSN zCcjI=?)&Jn&m6o$dYC}`@S6O?Hu7)Orx3(%t~0z*!+(hR2H zV0r--1&^!60kli}l7g2d{N~K+#s#G8HSBmzgCFT-H0J|dpMTLb^@Wm$O`m}M(u;Zw zbbO9Bw{Nx;I=5%>Px`6eA3!&KHvTP=@R|UfoSl=6tudPF^peW-9H+EOJD`h^-yyym z#J+MFys$6)fVA!@j3wZGN(Vj_KJ|vFZloe1nYud^(AoQS`uZ7hP|Vm7bWN8P*6x#F z!w-qyvFneZ9{!Yl94S|ZXJ}UUu3W@HtfS>V+fdRQyrC|$O11vr3=#taU>GdQZm2iG z{%IVZ=nux@!9}B0ehfkh@<+xPK*-@Nu}eBnJSn=&9MDzuD8CwZW0-zA+FV5&0jI^+ z)e_3$pLC9W`xX>YH=sBio0uTx4xm+v=@X`8%1Jmf9Ya^?TwoslM9%=Zpo|)pe#MN*t+4x1Hk9=K z(Rnm7iC=2qW$%*1FFEv*L-W@^8UOe8@$&rbhmYQV_Q*?@mIik$+`cjzW8~z|fAMm5 zuZj`N;1WV@KfOY5L> z4>k7TO2hJ)7ZkP0rq2bAN8;3tu_4L0fmBajbAvDAd7+??dw475fr*Nd5wyeRAJ1De zq4}4uOm7zYzNBb=2hsB87G?s_Wyb5ooWw8VQ#i8tcYFHNhW8?$SZD-;O>xL6Qb`)| zZP>eiEGZDb18EmN>7hUrO4gqJmhExEsHcs8E4uR)00viNUl~_x03^1wr5iCxLM*nPiUR z!DJc)ffrN)`-rOXB`BG|2)In(1y0~}uL=Zy3a;c6?a@?BhxqCQ-iqfAr4&kx5+8%6 z3trHBk!F>yg{v2@L_^fkEt6z%@gZ;H&fL1Bvnc2J-V5b+5*i|H$+abbX9KvxfOd+s z5w1g#@yCaIa~c6|fa?~ojR{~Rf*Z7Ruup+ZdMUyLa;5E0qgxih{TSn#7c-Z!fdF?e*zgJ6<5vFIXtuGWBpSNb`M-pdeuN0?()zyV% zUKJLzho2J^heYX|OgAVJ7G-p8Nnznz2x0M+JB!E%pbN{qr?8~3jc_cxz#n#<2w^pHhq@knMVBgyT-Y#_?OT!u=c? zm_n8^*lI#MDj>H^$bflDae^DO6GFco4UN3R6){WF;P^RsMtOcD2kHbf5iA`MIVtA) z?P%S~l++>x@IDs^7>P_O&piq100yz)WOBVGo018>Ie=?yObl6-sbedI+Vxwc%FUXn zV!fgA9*&^TyD;vU0m7f?QKs;q946|hE@7BVpon!b5Za)DPg$OAQQreIz#CbKsdq00 z8J|20!Od0Vh4`_-1EzNMIA}l1%ezcn?!N69g&+u2;dcUvGCmL2mWC@UtlJ%{?aps) zhH3GfvAv$b%U#H$nq=Jq&s)4ny%Fr0{VM+-dqD-Cjq)V)zO2Fy9a(l{Y2|qA{kHsb zTqV?M_8kwtL;)^Y~$N&GD)?v71N>HC#KCbZ?rQf2vC}FXlnBCh zQhZ_636!ja(-TAf8^f-!4$m*%t=J>9i-hWsdhRFOLkrldm^n6wU+BQ~E(CMFqRdCZ z9gQZMJQ~tuC(d{fY(CS3&SK zTE88IX!@>fRlQGj;6^u+Uc%`VJWi;{54pphe;14BaCLQrgKZ}LwZY_koFFg?lUiKE zT;P2E#4N!!iM8kfya}$-sHvn%OhIScCNgtbL44jW$f*;jeTghUac`olgi-MjNMhD@ zlh(n&K$Jt&KhGlJWxXu1t41DQIq#DHT}+F}0TahH@Q%I0qi{_Q$9NYMXOX0lP+{p# z^M%sDbHMTdBU8|rBuN`LL#==*oOCddlOvAxB)KxY>gLcjW_To&t=zN4$MKew$2#33 zjhE+tn-V;7Lgvj;j&xvUt&|?JaZ|$IoV~EJmZyvI$SAH=WM*x>WRi`wx!jT|K%vHC ztQ=<-;5_pZ*796a{6frEaZO=`jsMQGaoh23s%Z*_YJ0>0$$)oa`cMa zP5yWDIHyxC^=3{aM$2^v)Mik8Jx+62-xQcHy2$ny)Qh@HN^&-_n#iR4Wj7*aRoC`r z+3FOk1>dnyNL`!DP(q=QF63@R)r2$!yg*1fdPPFY|IS-TP2$whoyg`!AIaQk!&$3Z z6^l->W}ZCx6Tjj}>PEXSw+j|;3qfnZkv^gpnpkj>5w}+ISiLR8J1#u2P{nd2r;rK% z*x%cFBV3WYG)zZpLw+jie9fJR78=0cVstyVu2ke6sec)#z#Zo5|8Xz|UXk$Z7_MPgYkv~g?IvZs z{|)n7xveD3(Xy7r-MmdHNMCNE$n(Q|KvJeiT%6QDq*Atk4LCF7g?7xbmOd^zja>k@ zzbEbtcAKw-zTxZOXz}LCO-(&p%?DUc-0c7b@b&m)Xe9`;)7A`wBHylyMI9K#`758L z4$s@}mVT%Ub0z7JJCJ>)D5}Qss4APPez1J4hqy%|H?p{V-T^iG07jQq^L(+^W^DG;91!WFSm^Cb zaq~@Ia|D#d7vnHXTvBNb)y|2RR$c(NgwKl~4zg3Ee;EFQ?xhM2Jw%yV_-tN+ zS^f%;M%w=&jB(k^f1_9In}o*3cfgw%T#adne#d(S@T8$ zyyMH2dhE1M^f{iEw}bs_F;NLuiYQ|B=s2G6R=B}TRX%l3%->UL3ROsL+rUThT`c4B z>424V(25gBH+)>AyLQ9dyIr`@SVei)-y(AWjfR?oZFYL%RnSuV0Uq?O z*X$oo8)Q+yI1=dL|4YE5|Np!Al$s0tc`xwiy}>UD;5>So`J$BD;8v5p*ZY$Tkz4RW zZlNjU78*tFh!=84ijXVGup@PF;_4X`a_*=Xaz~4hOAT943+R#*Itn9;%JB+(sT0$s zn^`=-dH`Y6$!zrbR%o8G!hiFhCdRja7`HQft1JV-2?_se(T4NB8_s9=m8f|u+#e?+ zapWT>!VAspf5dnHBhBo8)OY`*xWD(^Q}F-29eAD+1-A=qlJLRSv+%I`EP*RLLS%bn#3*6qKngHh1$En*HL2HXp7QH|(A^ zd|yYaEKmFxiQtJ?h5GEbsFXxn&J1BRN;0x{o94h=1qT*GC5oLiB}uu3pfpF|{|(?F z=4i*$%=-9Me;I3g=GZ@d{mDQ60?~czqv;4ojAL+ZF5eHCOk;{C&k(&oX>OeTB=#^} zgAGke{=%j6@OuF~NQ$e}ZCmDrJu6)e1;jr#`G0;^#?YF^X#l&d(YHbDUTHN7vS7n! zI`Akd?;dis7&o92+}Y_m+FJQ@s+gS;P0KW-mubnwDQj;2lMb;`h=yolT^l?V5wD0X z#@quTvQ(DTE_{ia$fD)DL8TxjH)}1oclvVCrxAC~R`@R+xS!H&Bu(waa)^!a#4FNE zb|L5BzXo7^v)6j!DINNSd#DFtuA!exP`(|XNVU=F$AUE+&zy{-#Sx(J2OX&L2)H^1 zpgheYQi)~`{?=rO9rVlRv6DVzg?r{n0bHrt(qj0&04hAro|{Whp1#?kP$-aWg7U-0 zE1I7_y)>BMaOBm=bS-teH0evpP?|-(Ves~anm|JN0OaRCI-JbL{ zVEIz74351{i$&mrQ;U~i_yNICZWecmfY8g~rQy*y3sx2myuzn#IQx%ZHXDTLKJlBD z@TUPBD!ZWKWBB93UP6jv83I+!vmim&;8^@}2dt0P>#kJFzT6ARrO6Z#NQ}vu;6hpI zaeEt<7T3hZRro*vN6X60T+WLqP0h^oI$?XZ1FxxZcBGGOs)t22WEtz!BC^2o<~u5W zod~*+2}khu-*n)3ebR*dHB{1rAw6rzSeHr5mAMC3ljl|aj|KQh0I#h(|M@2`4zG>NCN za30JKHr%TYT?Z>Ga~FM?uebmfMjJxgIQQ~S`2PZURSo_loJmNg4!>e)U!0F>6Rbk! z^W*5L)xiH$`d%J?I4`P6%uHr|leqBv^*AUZndY>*xChSHQpuD~70|<0hr8=D2vf6y zMRmk17S#$^{4Bm>R|AXqak2P`v07Y66iuoe_7<{d>Kzj(WXJ*b6m3Mp%&0CgD_DxJ z)VAo11Z*a29UFm^tYs%-PE-@%V>JaB(N9FUjsoI=gOw$|aw`ypPtcf=`OJ*eIW`hS zl$tAi5rS{1uZZ}zFjK_(>?G0U`3lT*6-^wT%LjFvESj+H)-xAPyqmOW!n*sM981a- zh*T_^uEYJSOzy4XDz%0=CajCOu&Y)y zQE96<_QqoLq7p(bTTsJ}X*j`ETJdf+!Wp9$B=iPX(L^X&? zI*oM|&T04 zLwS$GS^!{Mz+O&a`-CnrOrmq=yTz{%a^5^OUF8&3)N=ksX==Jl5jgfxs+>Xxwp=-d z{`s^LCq642)|`XuMyk$Hf^w6q<$zM<6gp}tX{wY{=!@kKhSKE}`jQIkFdnvNWiTD| z&uom>2Gek7T16oU?^)c5;#8`fLI*1jw3jcZ@bN_r^8y1XRZd|6w|qH;WuXl35?Moi zt(3LB_NyqwrKVgtg^t2=vz)>RXW4QJeUT)&RHmFlUn3@1h0IK;ateLh8s!xFVu(Wp z3`&(#=$ncdXF)l|kyLZ{4`{&>FZ5hoglvC&n#boqmnqYIeateK0E_s#8DfHzMMQpsBLf<$J`IRlF(3eTr zS)!amUqVn+@W`91@ckW24X8Ax_}I^Z*^P1@Ks+C{%+ zx#myQshDR`zLy#6pCnLiGz!D~A3eNaDFVy?p_SrA^G@m-wZcHQbnJP|-jndM=9}NC zJ-(5&EXKZQg<5Fqd@bY6?P=}VMX`Hw%$SFI@%mWHc*7WW8^&BEy7;2fL8EXfgJ{Ej z%D37hG6LS>{qV6suNkxXEi0=4N7X()zU+Y{CWN?7Bg zCL<#?tVgQDsvkyGy$Vmd9;Jjep{p-B1q1LYTJu!bd!8Ci!yWA3$Tb$VSVxDJv8rro z=KIB4n2i!2{wJWd&j5s}Ya4t;vv`|$-gEN6fiD@|yW#_n#k($e`!|9L?p^T>tKwac zh{e3Ockg=CXV;?@cE!f{!?`m>I)qQfuNuiez*%DWdWJlP?Nt0D?O>BWWRY-2M!Xf- zf~e)(1^Au-UQ~n#mbm7he6j5=RpXfEGR%~8PfLKOJP=b6yr3+2-`#;1cu|kPCR2vi zQF#fAJl9X&l9B2X{N9x*DO*3^frTRKNb-JSFg-UMqncb{_@Pr#{CMEjr|0LNZo9(J zoLEvm)a39HtxxhG+`M?BI`nYQ>{t2!g0M4tFO0RpxS|SRZgV(Z<8LC_$ANZbFuJ|x z@c{(a9KUL|8?qPkx(D~-sKNMu9sHNYAM!6&i%rAbr_mlvHa8~2s0w$rtCd(eMu)+A zI>rkhFUkyaHQM2boh#0a)(5M7e6I>G*zpT?bBEGrB+U*)Y+mPGva+B&Bl#kYtJlKS zi&r95J1S?9nHfIl6~{$Ftf0*RB7ZR60pbxCh|-aUN*)i~VNcugU3+DKX76$C*Ql7% z&;a|P$neAXKAS6xn$swE16;RwZA>{M^(0ha1@=fVN%MPkP&2BR-4T`Vd6VJWw}$J> z!~S>}7wymCu!Vl|IOJS1K_eMuWNArZ%6u>Cl&XVvW$nO+aN@sX({R^A2_V`HRVY$a z40J}5y|D9if(KJLZx?eNZdlzoJDN`BrsFMEp~yyuv3&HZ4^i$>C9JcNacnN!2)F-x$Ev7PyjNk_+m{DM!T=;_(dlrT{KWp;#TQ4+rD^1*Lebs8;L%pTMIq zpa^+33js@lrvuz_9I`om{}(=&oP#4@-j#|VgJLq$HE>mUAfAkrgDK;$ zUiO8LxZr1EYr;vHA5oJ>p%~IKFu_}3Zwf&Cq8it9#hloS&rop;W1aA)$ylfVB`+vT zr358cREWzuA<+Z`pODE3$WnoTEJX@6dc~w%a>K-x#!}yBkcg-A60uYu5lfkX)diOw z!TVBi$Q2B`PUgrc!xVM9$Z{l+IN$s6JT!bV15zL{BA6IH73w@d2?>3Rg>h5PHaG__ zNVei$P=C6CGd1zqs1$<4H4+a7U%~GfaC{c;7&D6eEgoB??&oF7=URUu`~+NbYHLXh12W~V{N(m!(8Yy9_D>WOAW9atep(37L$K%6>S%d5 zw=_IET2BR=t;6ZB2_Cyg1r8wn4 zT~)I8Tt-r7;O($8Sx)M7$|Hf&5R**am{BLW63c-&E*B0AI`9nv@M~aZUfgCVN`d4& z33`V#dvYUOzZlu=iE58f$S&C&Z!Ar4XvfBxDQm{&FxA)tuiU%S3oa{z<9FR;CoXYP zPNbqyR4glIo32z3k_Gm_`LZx#X3g=w(+4nstBo5VifQz~p%Zz@Oy`I0JHAJ#LEzXz z9Llw~`1%gy&ew%-@2Tl{v@%>t#5=XrW@<(@8rXg@kH+)z$+_`p{d}Lru=;C*$@$p! zIb4a%%m_`w<7>FS!aqIJy8`yD4=*T7&h#z^hz+pvXZTLcXmgV7Jfn-s5mi)b6Im0L zbc0M(%+jdLs&yUF22tT#2vIR_QNqOPqH@GbR8mOt62SpHZ2QGLQxXH~Z32WMpz+o$ z0=lH+#mOR!Sat#%>)0w6`^IL6bs_Jt3ZL6}=y#ll!t2xh?%G(NOu1drV^wT0q17vq zFT5!0I~W4tj(bO|tC56E`gOcf9b3N%SBOYwe4B_JJx_nnu4$T^Ct!s0Qb0G}Q8CM^ zDw~y{gr2RzEh>73&5G1Im%Onf9e~$%bSnPwjufZXX_-@Z6QiX#wf^~p7m*JIeg5dV zXduqP4XS*k-cDjMFi(~G2G?=VH#~lMe4CcNUW@fnoCY=|I*VMyv{trCjguRiD7S(l z%r3*kv`_fCaxRKhwRBnY(UnFrWZqwaXPW~|L9MQro0*PH3%B}*)r<$-J>{D*&TXWO z>>2Nx4#sDP(@A9J9*2(Ov0HWGz4HF>o&a8|0&;3|FrExkzyGc=MuP5HpN=;+FFrDw zp1VH@z5u3IG)Y9p2)BS!9-1^i6oKP*yRl)wxrR@TOIzg4$&3PGsRVL?EnRFZ*ilM-$dV0ICT(aZbiy2MqHsH6l z*ln;0WD1%6g~Gjd_$t#lK;MPu@QxGcwHcr5SnD9)PXeK|dNbl*&u zz!`jLI7gOKTWc&PwHD=mj^v7RM{?rL*^ZM`^#bKx8B8^t9}YMBE2A|O%-d$*Ox~2o zi)KEKd2A!gzOwogRTS+q+kve-84*R?CB`%nzMP7ta9I6bKlI*|-!?8l=f||}YJciy z-u)u$mJ+iA%suBABtrT*F2#uESr>zmDU^kWa5m2!Ox+JxMo%)u9jx9xdNMC12EoCt ziZ^-)E;~COtc-^1SRarc1BrN9Uds-1yx-5|d-nKF^mtgB^R&EXycnmuOh_xWb>_qa z+TRS9W{|RLPI}qN!yidMNHDPwSr7+K!TxONGyUa_t#xrL(i;1oNR zj0>^LiI<|wvt^RX?Qm!EUzw)~80LEK`!_Z=k>@KO`(i9uLh)FcX9jeUuojUhb`lSD!5* z_h_M+TEV8cxV-wot;xA)vuGDQ=cc3F!_Rt01^!ikKH2T~cw?}#JeW*Rqz3HR_$=Bm z77dPf9JjqACN&=?-xCp*aC)MN_3`47QH_XCc=+@Wx5_8~9g$D#baad*zepB|sLG_@ zXFtU7t;86ra!4rWF+L2j2Ba`v){9~hp|$Xnds=Ie?x}K#&{|~cD|{l-Bc{7Pavcq5 zq&n`CPj*~RhIVEKa7wdqG82B zASIBwR20a}6$#k_nb<%t1^v=hvp}ZwyEJs#XVqYrrk!{F2`gmA$`t5e)bDP9GvmS9 zu)mJ~gm;8Tm{cagXJhEe_(NBq6pQcGfwNhp5uTL8ym1(jVWEXJ1%}bJ8FWfmGn;fK zO3pj>kRyCLfa|lBPT*8t3;^Hr_AP9{i(|otkZ=kE4^eknQ-O+C;G#mK*%lHzVP>J15n4?~r+Kx+msav$Qm)FzSWI@IVy5}sEdR9uFZSP#nV`E^ z8K7Y$i#)MyW$5590$B9g(HN!-uQdKRDnYZT6vDlE?R2;qZp`K1bKhpneA<(`X`(dC znC8-l`3x$0f<=-P!0!Zbn-`@qADiU3{V2n;x;5Bn6Bg`0N&St2ih!R9yeS)>=~)qp zH%ZFKRIb>h!0fTSydM}RhiAhs{*&iFa;%vD?+@T5Ui?oPirOtQ_TJ#-A!Cv$SZJXG ze6TEJaS>2bOHdLe6MPo11l)j=_WTHd_t>OeJ+H6jptSp-oRmzNJ#Ac8vf_ z#M{Qnjn{8Rw@X2F$Fa|A;pR&HdH*ncdk0=w0zSMFW8vj0gAkisa8k)dX&8#{IgzmF z4nI)ybP=S*I>(*;<<-Gt5*~$x@Nhf^zzda>ST<7$@9)5!UJE6#=`=w}oE5`FeRV#S z;ixuUY!Z4*(CejeG+Zc6FO-(2>_-PH6=Ou3QwpNFV`wfRbheeM{^4&rY;S(+>{PEO z+L4uY`5lw`*W7V$FDcC28Mpwq+C)uka*Bl?c%MqHau1K1Oe6d|rvt#(9bQnl^Xbv* z5FdKSeq6v5xU@$dBR| zdH*Rockk)I%ROPzU0%uI^)?34d@*==8VI%ReXVe355rt|yVIS`6UIx6;C&tV+R6k$ z*~{(#B_y^UQM@eQD+T2i;BbgTm6jJ@U5KYpDfqVmNJ0O2f?0ocbbc7_esqYvdH3U& z(Ie_z)uYXA&f{;;eDKIe`A$o~@ySygnjV|`0GRlk4~~aVVl&E#G%pxlmixhTk~$v? zVA2Y8?s;l+u)cCPHgU~iCs%esG-q4oBmqu+OA{4_)@1lx2R2$pAcPE=sJiWpwuA7# z!&kGrYV0U7ma)s%w|C)OE7V5QojB-iGdJ26V^Q9&36lu_)ZrR&^UZk1o8*$)n7iFw ztgf|3+b_FaW@@d$Kn0TYo6JpXO{nZ4Vga&~sedS?1FP`7 zg5A&?OjG1n^E2$y<9o$KEjF-HIqB>S4FF>Ym?z*hM~VDo$Qkclwuns;k)mR}s)S}Z zmAmlEIM33KiwJ;J644!AbS!UEOkFWqpxq!itI zc{X9b-=9m3D;D)(9O!D7CvMrLus{+1J{sU0kZj2dzEh535ps>=lnrefYY%HwL0AQkp(cuSgW_TF z+Ia1NO}e>VGh8xCd6h|Rfar2ft_uDVT4dVhZ2k(qwppM>QTZkhwnZ3gVUsR5%LQv9UA-haUl{W@1*km~>jeaJc zm0pfg9Xa*_k`znIwGU)t`D5zexBldCJbe&f=GwwHr$!s=;ePE$MQ>oGoOjKD>cW?m zgoPU@0i*nSmvjyEWuzQZbML6(%x+WJ!(P?6A~fRJ4y;H);jSdjS^N`)i#=HqiY_*g4tQ8smKRJ>hPF zUCAfs03Y3ZLVS3Rj5K`7{+ZQ{3zFOEc%kfYG?dO1GDtKg*%RR=i7RA-Y z3sYbI5;cC-Kz+d^IA+x6y)QgOXk80e;Jn+x>gsT{Kfm|#DZ*$ec`qqPesWyMny_Y&L{Hmd@<;_xwJdjrF$hT#?*Z|ts zh;ZPm;1%IwE*w|_0=NiLub=UQ3flSjY#NMSu;0KFyR0md5CeX+19w|*zlM+5!gf-a zK#DaL>U}!s>Z*?S+qa;9d%XDFJIK!|O{j9P?0HsmFTy{G=j1 zR}-laV>CP!TO7(>id|iObNlobwd49WxYcxA?^j-dU@vidtA4s?Uh>PSzCvBVM}7QM zU)ftvy_8mc1q@#)R)EMFb!!N3x*|UZ{JnRsIPf2QZGi&3PQePcK%t?(El>cFsK`>bU7FLag%#nN-kw8xXRBr$#D&kxI8+yw+&-;% z4(XotjDtd5f8(I+t&4VPl(&E0bZwI1>>O5(BC*HfJjv2_O1=j zAxBPs&moO)R5K1DJmO^>w#ReG!RYNdq@Sy292DyM8wX`?U9?LZ2L%jYd?&#DPkl12M@Z-J^rnk1z5GuDl7lgii@&`HfB};>>_@M2AoSrVFK~_#i1IWI_ z6$0Z=%^#2;QO)7lyp8R^kSgNN^J$4l7ISO4nM5%WzzwVr?e7ndQ z+yc)FZ;EpUIbum<;u3KHKG=m(LDX6LTut(Z`=zxQ2e1-S>i^BbnGd8-F!JF5rv|+73E&&c*_Oa{@h_X1%dw_K%5G6On|QfJq}-Gj}M;d z50;m)Rh7L6*u#dn;p%X$KfQ?K;IIk5_XeTMC*Yn!T@DYLls!=%KCQ5aW7Z~Yvt+tG z8`ErWMeV>(bYY1LhgP{-T`g1O9~z!XhRe_LucD?&?W!Ge5CX#}@}mIQnqf3i#veAp z#q>(NORv2O;N4wVZjNW-5wdW%_%doiMzkgeRj#8;B@=Tj8%As$i;flYc&<(^3^w~~ z!?lg^#kj?bUg6*ux^TW}c5H0oRL=Hw$)zw2)sZ?~AA0Wb(_|NXw$r2~OjK!OGo0mL zMQsX=qCNP=f_@U&X3o-+9RBkyj+T?!XAXuy@P&u)(z}vD$EU}mvuE+C>t)G&7_E4X zGhVQi=mAAG>@2-^8;3!pZ(MC`MsF{|3YVgL(0F4LhiyMbz9qt#VbpJ8c`Pxb<`J7k zl16XZ;gR=-$fGqnBC?Zht^h+qvG>>x@p}s~eu@MV3V4hS(`I`4i&srb7z?;VXd}D? zp4LXBTPhqPv=Q0zGLMM#N6afK;83N93iW5{ZO{7QIWeJ8m=dr0;ii>dN7T{EZ;G)- zY>rGsKYP@V?l5%i7Z0A|uL(CtR~g0JmZ)8o%&q>8AhRG1dKon~tZG4=Th_NAMa#<; zgosDphki9rP{+u7L*!8(SfZWOUrDYyMx{u@P6PO+ z4!ntq`Ib7e6Gb-|r8CV|P=lw&qeTl<4+O$DcIhIUHWZ|qw&oNrdNUc7^8Dlx_n@pS z`qsd}-w{UgZCp_5;;6FdZ4liS=dWIno;o+8^za?ppsHvp`9s_Ki%AlNuHQzma0HPh`&Qd258ETpE4 zpy!-si?#5O&6LgCMa^{klGK!HAq%M~BUqD~pXh+}&hW97m6PEP_OE9FTR|lz`aJg0 zY9-UHw&Soy!(s|OKR*0>Kt$IFei2V}fzJf+Mytt8Fva*nxqPVd)W9+08R56)>B0Ir z`OPtxq0T*B061>vkwy?hHzs|W^6na_of@58+Ze6%rFnK`u=`-Auz<#o#4LD}8aZ>u z#a)c&f?2PsJ10u_9*4JvkNS-6D2+udeDFtISf}Q`nLdUZqoOL6(m0uSjydD1RIF8? zSFXUiHO_KJa2I^BD?fczLlp~EAPG2ZP0q>XeJzVu)Qpu@ik$HL9t5% zr(LSkpkTK~hW-M6Z0>@ubm8soFj{ye$Z4tVpmi~en!08w_D}Im+LSNz$W!*1fZi$r zU+%)iHo4ELi%tGX%lhu}P}<~`%i1^YqkLa$CZ78!%*6ArqwSMBMNdonDJ{weYv(P> z8PnX&ZBI|ObZHbq(b8eS_se2+-6}6+vbNHq{A_9d;0K%yrGe9f&^^jWgxB~0ye_=Y z-d*YksfYu@4|n0M)Mp1no;!h(JJ=kNUA|GTDbhJP*o+SdKhlNAn&4KVO1W55qjC|E zgt3)FfH`Yr{HEq4gC{)TV-1*ord{0hwX9~dF!(sB4gry29e87R3@_9%%fRzio4{WVQx%_R^h?V2)2)efX;meQH*F29un?$hxRy|6P3}vv)`vczz>XzZj|0&G*cHmH#ggLeE~Z zd`kn0jL`>IemLlz7T)q};WgpCb-m@ZAi#Cc*F1LkNEeDKGtoeoD{;UNm?JMuQh6Jw z*^Kw>x+SoCP|?7fitIA_)(wkBRKNa3aTH3s*3D=jH}vrFE}U(~u<(=Hqf}WUR)=y=!g%c#J6t9s&S6yefRPe zh=7wT$q9fi8pg!C%u^#@J0@;KF26<(d+~5>ANxxk`cP`>Vv|l#VnJ=bA1Hl(=uPBW&e>Yn@KxFa6S4!p(uH%>9)qpMT59|v&r)+*4am8W zXScs~Z6leM;+@?sf5Y{Xdo77yPUeyISB9Iz^_AiJ@NR{ zD(=Wj@I?4{BXvjuGjCafKc+fCl1qCsuY!Gj0$ZNLS^iZ-u0HWeEdujVYsSW#w3+Js zMTh9FE7FJwnbU7$O7D@rX*R|h+23;~Hv<)j{hx!v_db)CO49Ki7~oOfLk{;T-_o_X z+fJ`6oo*f63_sU_%d^uIqXQ?=M3(7wS_i zGhV}-vF&cJu?lMmu2_zo0-(pc$$Fn8IL!1hl$OwRMilOx^gg97lxgDkcFZ(yO(5uc zp9%?WjG5D%xK}2Ag9TD zp9**9qJ?*3ny9k0W6D0o{vE5_m)%%AScjOd_eomY#rwPLDJNHbPiRWJv^*)u@yM&+8rdT)_P1pNWM5bnYpNe+R;{G%_Wbgg5XjZk} zr}t7-8%q#lGCiq=&_cvSrNYu)MpIq8jDf6FSb{Xog=IwyG^N7Q4y*vBr5Fjfffb5_ z_rlK8lXHXgS;h1m3)t)KG{rM2LC|2wg*mz?($v&QyBS3hYieAhBYBi1u!}5^)znC^ zio|Rtj?GY?sHR2&-AHC(e=~a+)Rj%Tni>iEe31ohO^vis%HiT6%6>I9`pS64CabBD zR<~s(r2-7{+YhG|QTc_2&DrtB)+Q?_nY3~jPtmE6t0t?d(MZrrO^vjxFmO~gHPYI7 z9nOq+V>LC>?gBG!{=!Eo`{`KPe=d#}@PzD5#)K@dm+BvJ28Hq|Sm6+0CYk0)Vud*w z(C84PdF10pu>H(oTHKOx-f;)&7F}3Q9s1|!GJQ}`< zwpuLX4*YHxR}U(j=CP8odVV)&v}8`f!hf zr4abng)htFnw}lvj^mK$jGA&E47ndi0Zn^y!2Kq=y0rop3%>(-Eaiu3VzLE$bCJUA za5L8?#o8Tv&eW|5vT*HBxlUNOCblpuHb0NdlNo60)(Sx28g9@;t_0214GY*f2V8cB z#-*{F7rMGNfmRXMB|n6wZq0jV;qfZEY4k+ev$_uM!~46i+`^T>mJ4p>3dqd?Q7X<` z7>UD5mc%tr1pEQC8KhT9oX+@GD&QD{>)uYK; z=;Y;mf3CD6G{+~p@hh>j%=jccHrZcmP#z+JptEA^nIqN=#Y+WyxDW>LQ{*N-AANsE`_g#DfU<<-Gt65a;4hR?VA;aLFy zd^6>cc?mCrn@uw|Kz6e^{i?B}{^3tMg!AJiMi~8Qy2;AYPMqaY zQPqj!05CVXg5Wx=C(?a#VX)a>8?J4PFNU4lW->MY0Oizo33q-nn!MnDKyu%74AAN1 zewuQiyMZ^7n@cwr+pv$20Y%P@j;^mqT17+PH>es2RFj+dv5i;|u0=#6nmoK@VHx~A zS<)%+vG|}{#EapHi&Gfq4imL@2q`dnvjh_=$yz%Eq@o}*^Js|)9J8+Vc(U54QDX}H zvy`KLV_D8u;HFjd57P~Gi?>NR8xc;tl-I=y`cHP@4Exb&xIUfCQItE{Zt7C~d^7jC zs83A&Jd-mk+I%uA<-EVW{uJeSQAx%N>gI`)3c{2%&&>n=ZB^8 zb6$K-@%q(a0k89#nJ+*83}vcM#w{zYzq5uVV-x$TAx1W#Ep})UrWxH3XI`_x32hQw zajk`u?ONmp$R@N&aG2@CFX>YHldcXX6*=?HNqv2z2ZSv0P7Z8CeCLZ@*dX&KoP;(; zeCR6OoO^TeBCK?inX25VS>&kHqk`?pUNe<$PRjGUvxq)ymxkM8oKBUb2|cyWVpDAg zZKazR*QMANU8S4X&O+W4-84kIwrN6}bRxzlGmuSalU8<|c(Lnc?(-{`-D%Vu#Q*au)2mk6wgu7O=v4*wD3xh(^3UK%}}1g z-7_&Gn$RZgm=;$n_K*1{+zBY>q_ilEbQ9VnYM4zS_fE`}_$})@mwr<=%ArRtb4}yD zw8?w232h~KFL@~!+q?@YlK~YyE$yeY$)XQZCaYa1p{**vrAs3>4nz})N(dz8RWI#t zY)v<}ru_+PNlg>YYh@l)M_7~7h_&QW!(9}AW;H@sQK3&) zbd#}+u&OCt)Od4smf&Y}wiLQ3-mF2w(dFcwKm3U0*xxgT=Y2t|LxQCjVHM@cniLeeR7(jzp7$HpOQ|>}Fet z-AsfQexghC>Zf$|jlVQ^(b83e8Kom4si09D5yfG*+FB+r`O5#M!~}l63p8`8Ej%-K z)HbSa&FhxklQ!wP%j}T^T-gBchLnN3Vv+;jmbfUIV|3x0QVQ?80sJmy@1ksl5>LKi zrV_nt>~&M{rfKH?{C)$(m3kEDij-x>Mda#X^ydls6v(FjOO{`h_hvplIhm7Qg_aVo z-2TZ^|7*4_&`tf92%j%yr=9vQZIp7DYU;nfGUe2NX?2gT1-erA)F5zL5l#Kqu#u+z zdw1Zqk@!pT&MZ0E)PIcx1>Fnec^6^esHXmF7)eY`9oeAQ*wlY%cY&GrNezES)$7ee z1aVtR`ppS!S_=0_-!wT1gs4-Kku%1@W^M2o@zzw%eKsH?UbS`$7bkOSo+R3;gl^jH zq!}I~$*zsu!UB0y9saNbH0JGHY$k(hoMuRJ+?YI@&vI37$#8|&dd<=d#G_C{|J5<20#PI+`xd3s{}8*?fQ34^j25sd%vx7dRvpr;i(Lh5&-x0@i1?}pD&)Z@y*-Iz|L{2Wqjl6_Xurn7cqL)y_Zn1JGCg@n% z4y~4$%&m79$X-GbR4}VD4~K|43n_)xqE5S%lO&>U3gbJBMa%r&MfVblC%>ie|5kzT zhHj%r0jB4^Le3O@G$aGMZJnAfM+yGELo|KdO&(jFGtEpw_uir$kEk_P>=v8d>_Sm( z$T+49Xl6ZTVDYD>A0v@_eg#jw`O(?6jnT>pybKYI>J(rb&9|e2hX)&+bwYHbIt83* z)G13B(6f(dRHp#hNLry5bfY@|h-&VcfLuy`H7w0EMV=BTPC~79r z)FA*iWvwaWtERTD(z;4^Kxa`JLrFGwOVH1f$0C}$CALY~L^XFSzb@t6EwQ#O$VonT zOYF$0L^O9xtea;tzsCg4Js~9aPI2;%=D|->&D|0p*ci!r$N5NOqV`^~r?5*@bGO9G z8Z&KHqp{4R07W1v1PZ>9s<9ysm?lA3%sDO(rlcXi$3)5PDR@Kp`nn~zF|cz9$Sb2Y z_(`h5Oq~im3L^yqXnF$lt&(_Nv!rre?|OCOevasYAu!2$8rQ56KGy

c9p#f4lIVsfP}C+`)o(Jj!Gm9 z4Zz8;bIlI;da9DAO zjq%!G8t%lR($_q47ksD#OC(CE&>(-0NGn($p(#evL?e2fW?=;fiFNRwx zapp9$oQwHX*mUl?QR*dfE5d+03U3M@t{X4Q!=FU>;Lo}+COLER>+5#GJW#N0?(|xr z>vJu}h9k`?4U#C3%UZ{$P1IZOTLUFKxox zfBdr9AWV}5J7(~IayMS!j(0x(g0%up_!DVO}L_mn^6-bILVHQGL^Qy_64B}U+BV^<4^ zb3sG(vuAs4X_L}|2k@> zZqBfKN^9;G@51MHclMW82a`$YPh7h~n>-B%;p?=a!?!k7>uMmIIPj5cyTvzAGnG|_ z{kfpvxSX4ew}(vh70bfT^eP-Kb^MDP#wVJpLp4x@Tk|%n%?)61eR2so%_&+gmQ;o5 zDIa&impctL3m6EP@liULo2e>w?k<9nivAc+<|?%HK6t;osfHsRd(6tS2i8v#J@W2x z6Xwir&v5b`vHmC@G%?pB?Fc)woyGWHen`fbW!5v@VV9ET?*n(m-TbdVb7nG}hI{4j zqE=#pMDIQo%R|&m;hiq&IA0HqTg*aB8Jfg)c-M|LB7&=<_1U6_Yk=#+*Z5M zLey6g;ARSNb&gkfx|X{!tQz3H^_AgMVdqd6zHJB4%r0@N1Cs()?YHgX(ELS276+_T zYS9E7(yG61gzFb0b#TU>*{|~di6Cj9iUjspH`5*2k7*~pG?OatNoSX<`7kUefgr)9MD1{?bwqm?MPOh9*2446 z6^qMo$Mz52*HmYKyp`=lWN{gOn^LfcYjN|DE*2PXPaw+6ZVS zzIc*Pj|KYl<HD@(~Yvds~^4IV#stH}ix{V>_q=%uItAdS9cvWa7Dwu3o z*x^uY?D;6zcGtT(}4e%qegq`sT7H7#~*=H}$35kCk-WopYBk`rNh=mXS zsG1Y&!Gdj55#Y<7jcdDT~oly#c9ZSYFIQmx&h0!Cs-qjpc%b8~v4{ErF@FU$yGZ_oHm8fnm*3_t6(1fv-LqKxY z8dR?`%{cI}224NGE^caDR)k zK_=zl#LHm!`QdQ0kFQWql*fnc*L3)+4t;VqeHuL6m3~1_V8*qUJ$UX<1--qn^YnzJ zomu-u`8EzVWVFh-Pp5F;0a*Cz(`n&dzZPB-;#8&O#X9pj(vkqpeRjnG0iOe@Qnv4b zyTQ3`lFH76PcxKm$boiccJU?@*ISf)q}+pDM(?^|(TM6-z$mT)bFRy{zP)DTB6Z^e z_I`bp?lo_2na9sGs%4HVrb``Y{NqO1Wh%0OFIF3LUV?*?bKRD#C^}A)Ks2zFsMDE6)!vTEF$*1h)%{`K!SKmv5q7zvoK)(m+owShZz#pq*WZ?@gabK~oD z#NkUF`fzIMTqB8vpG{8Yn1$OAyRPYC3tt_+w*ysPGH?osrSbJmK4pD$j0z>)J*_EyX*Nczia!N?S!GhNA7sI#J0}8N$Ft z&n2d=id*y&JP|(LNaeJ^JYy?9Gekq+>EdQ#~+Z;~WNg@-%S*bFr z!nwY!OInc5TVLTXsy&ybgC|-F(q@Ix*|m+)N=)fJ>F?M#%|=-(y>O}H!uU6z;9(#dP5aHkAo2f--fC}o~+~bB{r)roQtgQ48r>YvqbuURO@?TY>_5jr;vp}v;MXqfM zev7JGDvuQ&)+t_fqQ0>?uZ9^}r<8=5@xtb;GU=9^yxLr4w}eG3Zgibe(#mG~7-Ho# zol=E6$90Nae0H@*Tt|@G6v=lfY@J#avQDW&LL1}jG%2&fy}5W1g?*&`61#k~$ReVR@w z@12Fb6~+1+?39v#jm?BJ4^7r7CGBcf$)ZlF#)3o=WHCMPno=%TPu3|VL1Ly6KQLH4 ziGNMhDOI>P@0>79uTF0oyPB(vm!RW3av7!?u7|?`0~r{k7rR#`vOK8`h=kl#;07;{L{Flbuq9KyZC; zlQAxxCUG{h8QblBHwo|HT zcTv7ilx_CjFN@w*>y&ygRb{dSF{a&KlL^r$;OCwEJF_ z8h5hl8wpmCqRqsSrus(OgR=TYTGmKrVSmvcKKH6`BR+1<*8Dn+MMn)5Vvg9JcUeWQ^;FQ;Ozw3q`= zlkAmt6$Xx~`bJt?W2$!naAVas5_AId>}>rgW%nJcdYFsj1w0{J@M2!Y$&{E$ra6*W z&Nl-Z#V*YwAFq)gPs?CjL0auSERp&DKnJS4%A#K#1!m?mF^&}=w$s7#dFFWxSBGo; z>BY43%XO>QBdupd-Pl{<(eO>Qbz>QK;CH)~-XJJV8y-xPBDU@Eq}LKPo#I(18Y@D} z<>ED;tM6JD@Fd-L zF%`^aj^9N^bZmQc+8(Dgm1qbJa+zV8;A&g^eqT$boAXO>q zb!$G}M(kag{f)*7Rs>u=IY`a3p{rXHaG1>$KN{G*hkqT>RJ9cB%yUMD!&;c3=LUD` z_#}4wOFQAa@FYUN19>dvhiPK61x)55h1ua|u1$)yJNDeGTNCt2%>Z59npoMa*!(;) z=Y?C<@aCNfgAK7OUULPqf+gJMb3;Z~w1=!0l7IKO2v5#LvLPt(d=fpadDsW{BG`B9wO|*m#F$S1ibi#vy&m{y-~!J zUI+JuC+bf0@rlXl?%xk|;Za(VWQg?GXE>hH>JVXRRXT{hvN32xgTr=$9h9rQWA~*3FpUe|{8dEHTufyGp?XX=m(IC6G z27lRU!W8oJP;!%qeE_3?C|Y80iTrzg8{#$mS_j@he)1$UduZC!rKF-QD<3srE4jc3 zo8q0wZh)ksmW`fO5wRK=?) z5k^0nZVI!s6K8o;RCS^_08+d%DCaL%q4EbPXT(dm^OMnZy=+kKh0|Ms7A2LY@c<@# znsWTRfj5$yOE>=Au#b=dMb7Ivx)t~fzd_YNpqkvok8Q+)a4m{j^O7rgvZNE^WATa0 zv8gAt2%OIzvYB(yQ&L+7G8_vYe7Sm`D+Rk=}1 zk)x7z_KlX+$nerux;ZJ&uLmOf@ISYPH=UAo>iTzep(TCeT$f^7bd_%9L( z0A3f~SJyMoiV0jts^+@Gk9C2r-bX>7dt;I#(IlZuRwxTk*hQ{7?t4@Gg`em`JO`1a z)=c4;(Nk|&IwFz^8pRQ*?yxt;v!Cw*&75i}ToHdM!V|(Ldk(0ZOQmR>`lc8qe09UM z0dRnKL(0HiG0A~%OI#GqF}m=+?sj7*8vHJ0@1kt+5>LKirV_n?>~&M9;LHDh1H_ej z6zGbSWyVG1>S6Tf3HlV!Xpn~`MR{-L)02}q=~ZYc;mYlw+^ta4!H4hdz!TKz;(RGP z?bLtiY)?5%HT7R#nR4pCw7M;ZA$2W~XVf{Zh^GE)*qj}2Y;E>ehMN=a9h8_k(MEPK zB>>&je~knM-3!ebII5}t8b&6jHa68?+Fh7#?~@w-jH=h0hX~@fl=Pbu*t8Vxk-lki z5(rVJCL?EzgU#B=$B3`@3OFEx=333kocb#gZB;@y?RL_%#Y(bkBX@bT2!GfCnsb09 zq!VFaU&Ycq%fD(iY)<6Xd(aof*;k4ME8w#Myn-o-BT97Y!f3jDu0J2s595_5hfhuK zn-0f=>1bm;+^2jSwbb4y5b#DBaM%h>%7ERlV!NWeic)9N+hlWfG+ms}ka9&v^8H;u zMAf^d;>Ajg?_zI|Yd8P>aLJQ{)vaMTdwl0?H|&HnwtNQuW)EDl!rvePvX@W-MY+G#k z`wr3caW{Evb?SJ%wFrk(rHU7Woq8aeW$BPEKRwE|zddX2$P!uW!c1BzkR)!EN!fnOG|^fn$~53v8CV@Jm$Z zhfY*}6&F#G)|9YWnmJ?XG=aK`QFZK-Yf3{nGo?@)O`hCCZWK*2M%OIoYp+>BfcIPi z50k`QG$Mqvm}PqZd~Y`Eq(e0a5B(xKlRAl$b(ET7OO|KOx#cV{#Ui zxlOylUIhmhL!}QZ?9M*h;is|M4u=VclVRtY9q{#3B~Qmkg9)BliAR}!!vcZf9_gE? zA>r>`r;%ef>{`4ep}>`}XJs%Q^v`UJ*9OyYC)Sd_=8?MuVcGWFrH@GbF`i9{+j zQOP9#H@ZNRWAWmt5)YsW*ed%<(%KGlHwDYiz)HA8$1)pQZabG|kpuQ(3(cs$rNLyh z++Uqc*OFN^4-BRc@RHEv>$!W@v5qagB>qoy2Trw_^Se@A7>>@Kn@;A&!@-Ix$IX%I z-K?4TOJVOKpo#xvkl1+|Pg*ZRVK1rDbiowUGu)Zbm8j-X5hR&TyRh28qcm=2KzQxvMrqFOgdjmgG@*Q}}S*cv&9) zB*F)O)`f9<^x+{A=@Q>IcU*?W#h^O%ZLP49%e*nh_Qftt+GDdZ?6wVJ7QLZHB+jfb zN*F;3$dw#U)rv&G-*(~Ywz+ODoNHan^&>mek5-7ktpPsxNULVu5ZBvIxl?r15b7)A zTeKo8@GrYSQ(BrgTDo_RH!ki^wrG*LgtY0ys(vAJDrY=y42mbpoXLP2jsSkO18;BK zp625PX`Ayon$SeCfJAR=YbPur`Su-fu_cP4!#f>stg>F>;rhyOefgpkP-^Qg7mb@I zZeD0%Pa!c^iG$8YuUtvg!qT5K0zB_P$FP2Lu{;1u_;itiL6G=|M^ zy-g89EAf$lXgF>f`$!^GNnT>9KUiK)oAS_SOPlcaAHQri2-9T2jv4%)JI*FGoiZ}`S6klhU2qCqQsq@mqg<+4I3y;#LSIh z2|g+PcY_u`-2=PR6d9Jj{&T94TL#dQ_UCYC^53RuJRe<=c^OOn<&CZNXo0^m?s;l7jc7bi!#0U*iEEOR>F|?O zyv{qHNkBW1$cF~&=WUJlwlW{-2=Den?g;EYz6-nYXBLAFU@Czo+wFpUZDXaN>eiGL zCJ%GY4W1fJoFObNVF7>GZ79VkbC%J^kVR;bF;1$~$r2bsmJa8rMKMV2^e*;9Ce5Vj zA`LJm5SmFz+Jq~5xEZBVf|Kl+us3+2Kuj(SHv4PCwTSS_a?ImNDl{muiByN!k?6G^7o>who*YKh$1?L8v z!{}C{Z9P3)pKOeejmLwF4`E@{!wVer!0CzP2U`7k(1Lds<8jjIz`CK$2jj=E`xz9L3UfFXdT^V9dw54W;C)95I)Sw5&)5FH87=r77RIf9BA8_+qC$1ISNp z$$M)ipX(z$ipCfmIPbcTXM)no2A>NWDl|1N=cXXb@-qt)RY zZ^iE~IDHB2RECc>RbOPlmWO*-YL~ z`ec_=!;FB;g>3$0t-y5na;KpxdM6*HL${f7OXu!FiWL1Z9=qNnZ>rG#yPN97(Xq#L zHM^*vBzpba9W4*h}i)zY@2 zh8Du1iU2oLnW}TV!ca#ZGGOEA@57SXr^3#mE_~Y#pc(4nR0k#ntPF13#i99&h%63R zz|*1$DE|tWStPLexS6_nS&Z{#&uQKKS(>t!LSc+XEp3D$Zmf`Q#sKQ{#Glx!=3B^* z#MZu{qIn%#X8Fg*J+|x zrV7;!@Acau6;BIZza2KhwHA3@=^dg^1VqC;^8An|Js9ff%`T%nm_e$x3wk&4YP+kB zAHcGE`S0S|d-x|!6&_jeV4g)XzJg{JiYNIz8Zk(!I~>^qJ@_IO-M_bOHi#PhX3Ea2 z+>fS>)L7{IDb>k6a~Zm%4vQHx#QhBZb+e;g z7uD>lV%^4=a#G#j%w0j3DpBm;Ojt14u(0;O_z>W?1^Av0Y>UR_`vICMBi0$DWEl$S z1mD(asP0yWOl=uxCI_$YnsyKrQX+GBf2DXxp;Q)>6iy`)EB*Kc4kMh)egE)+I=|}r zT=q!%b9BiZS&Zg06{X7(Qhht1AMyc9VZ`)t3`UkvuRzeur@GHh(l_GMh=b+xGE+|B z1XG+RX84g@3RfxOqS_6w81Mx&a`m;o+gk`qsR%xyHl^yVAPN>OT&SoU_D-swMNquBInhs^#5&z)f zg)o#poBphDh_Un$%XW+y@@@%nqM}){(!w@?wZkicKr~G>loIiY{}jGXDWT`08)tT` z6=ZTGBon?RfV=n}z5aloj>OfQ)ephPw-&4%zp9k_=8lR7Mi0|JH}_=Jw&7uu>=~zN3W~LKlSEst{R##-uFKpz}@xs&Rye42vT%A zw_*tI?!d8n_=pSkz4g`+_vYr#9bt`ZiUdCw0HtWun6PfItjoMTRniL@RQ9K7>ee5%s-xlHjfpKj0d zNC4E9{K53xaEzCaE2KqFq(6+dQTd`57IP8DbK@{1dvC6UQf)dh3!#WG-r@HRSBGmu zoZB;DHJIU!_-bGFQprk{R;Cy6^PNMEk-f}6x7XR!#ne)%a<5&fEVo_5<)pt5t=ZS9 z3%T@pF1GL=T^)SV2Pz4Lt>i$8nY608uHl$8xJo4`;?7lC%8zOhv0LDIi#MrH0qvRn zD*qpQE#a8}e#F%vLW3T9FS&XmrFw+ud~%F1r9s2Qr8kM%kdttK_*$RZ5DAzxoD)|a z^yP2^_GVd5Ja|_CN4c3)0vg#{3hdW&_pLuU98Vt{j@P!PgXw5vJ>0MTD7scU0>tO% zHt|FJpzJ!D#Jcbk9k|^G7_oM^`!s*@A+nbTN+cq+AW=2Su><5ZBMPf^?N!q=PR{^- zFaUARP3G=dTN$t_@5YI!u!nEHebKV1$tb792F*g_l;OOStz`!my+= zzN~qP>Pbdamz{MNnD1S|ZVIcT&B%$A`58;A*QWx3JB*YWX;Nv1?GG?;TvRO7( zT64%>l#-pLheInF!lwsDz#Gf#pP+fi<|LaI@^tv7Bq_;pYIuZ#%kk z<@Y*poddnPXGbxgA|LAd{L#E>i-q4~Hp+qF05F~tCfm(?Q|UOcI+5@<0bHh1cN&Ix zY1tT0hbyOsXIb>>)Ye*5-rz)4R5x<5x`I6s;RvM1>CyTMx`PvRI(Nfc;c6A_2S)3o zwXLJz{>1#qQt`EXadPn4&6WP^trl?vt*PUo5H#_r>O*gXyV z6yy*o^d^vaks0Yct{3Z|^f+9rY>o*njdhmTgTf<>&ZhpoR0{FTo$%X1PBO9$GUl~g zM7|K&QTAxR;5q;g!~Q(!isV5$o7nktX@6sDig)8Ao;?Y7KnG{;zda{q>;&c#p-Oc| z%SZ>M3@Vxw%K0xh$3l@}Xi%&wyHhv)K*7Wwb> zvT_UW1w=?Sd&&8`(-Jo z6_wYez+dGB2r{k8sr-?Q<3@|S*HP9T^u%)$tB17X{FHE+Qy`Ne;b4Ebu;a?b2{MINaadxPW~vcvqT^ z2CGSIgI|~`vhd8AGk5xCbiOlyJrW!t@LbQs^Im=(GXiON3CmP3gWVWAoDN3o!*STb zVraj-66-|5MaGz4SRbrmRYo`i|2BY70&MQsWHLIt9_~#38_7UNc1p69x9~A}OlY0Q zl$!F8r!Q~^;8Kh@jaCNJVOn*H3$MS#B!;EwV7`kY@J>%wq!_8i`kicDj=x`QJ8FAS#fZc+KO2rmooh%0|egQ(IW zuRid9bl{bx*NoB5%XUTTalyZ%M}ux5r!xa-Rx8~tg}(~mYbz2dr!*Cbl)+q#9tjyU zg?lMixZ<6aGrCAW;Y$I$tQt{sM}k3fa<-c5STPDD#4Av_4|m|ss)*;6t}+F=+P(BS ztafOUE7~Ugw5m*~*6D+>u+$ZO8CN|;vFs6;XFFJAXMGdN+ir3U9L@3aCEiHyK+4W;V|ec zd~XL{RT0^|I#neaSE!e}jg8r%BcY49Y#H3czr?oWsszfZR8<1G=|V~Y7{F{2hdO+S zYn9!x;z?TN2s8MG{O^)Llrx(K!U-;^oM@*g(jkxK zWo>X?$E-NSvX?v4G^I1Rfbz~P(J4ohxsDVH0u3AB2OZb3BE<9OoAs7aw2)A|!zRTu zYYLYR4^ERPEDMoY?_)&>=2fpUr84)iEdhB%oOhzmTajO<6X)0damWCh6Pk1XqrqhKAOQVT7L zk0ldU@=jKOSzf8CQm93`Kwq3u=Hh_D`n4N*;PA@hSKzF~tTZnfng~A{z=5=sY!yq0 z2IKyPSbLP@tl=a$z@0m}+^@Kt?x2VfUISN!xbyPjt8?3{@e%FB`I!K&%YDLlpct_d z!hNI|RI_vDg8(;*hBJvZJ6~%AKNm1Cf?o`D8So_G5(NN-@}$`9BH{Y+0Io72TXeHf zHt|8J5Nxx!>T=D70N!8*Id5Psmvl2k_ zB9&A6=L3M(ri=$d(KAH#zd0}6-0xOF$ z4$c8LpFGIJjy|In6xt;Yer*thw!rHFSc%VETsO=wEXuKCD~rXbR@@99ijGTG`>iY} z!U?uDl#TQSS(Pxn0rtia4so8wDh`fHC!84m`cp58d$P0XdX{bBmpX7;yyn(sTv=@@ zZ`<5ly%-li?LNl8j6(enBU_nbWG^3!v?}f7CvvTAv~LxC&~>aE?~k$gJCe90eLWv3 zw29PCUeBsHM|h3bg9_P4?Nm$JXW+A4I3W|$KT#yo8l1qPMC-$)*NLpr?n7GVp4b{? zKeC4E+@HhNpvD-f_LJi9w^oaT^;(j)UXbj<+W3;RT^iCI(wI*}9HhldT_#{aw2z zyFb?=j~ASp+*ZWjSCe(JafLE5+N#O6W$c=4`$WQKt0p@?)*_dWI2Da1Zst0Zf2IR3 zkS(=(alycriG7#l{wm7d!jgDclyj+*pSDO_LpVXQl0`uQkDJ&9_$ep(&2icff2cdDvzUb|2VUH6SMYvS0JKET_7EozXga zjq<1dUW0ZxMJFn*lsA5!An8`ADoFZD8c4S)xaCw@qNeNbBnh{6UzTlf`5-p?#;i6q zK&G9@kq{m0w&uHeNni|TFg&{>y$Pe>V#p&CUFBAmiSB&#kD?o2kUh2OAF$|te*pU= z(M9P{I>A!`Nw4Lx_!tDtlGzidxFuHMd54Ez?!pcRW4Pnw#_Ko#Q8ts{&2v^ZqrHil zPA3XnDu35Me6TZn<-r3mi-XEX>-~+*a29qZ>^=CwU2sMGV>S~TCpIL*u_xg-?FzqP z7u*jA^doffP-u)r#X}8@VJa5VUOWW<3I}$UK4x*ZD)E#AmxVd~R-)QJ=|NQe!^K3keTGA7<%acxDWG@3{+y?S z6w@y%iI8bOw=J@yFM^(tagn9Creg8X!EgQ|9v1OqDSbALWcbuH{Nfja@V|Z`fY0Io zd(smJOk9&^Rg6Ep{APSO_9o-6?%&whylZ0}b#y-eKvlBf1HBYrE$FDQ4f6r5+DgN>X>o5C<-M^l9|9Zjw>m%-8AH`o^nGF&?gM{9~M|p71*M18& zvgRowJ>P&f?R7bvq~Df&+yNK6(Y_d?5Zs*my)rhK0jvR=urGds!?3>e+j#x_{_^kH z>aO1-51hY8COChOTmYB!fLnT8zhZx369DE_QN zi23i`pSKHn^7;8i_vh_OkbHij;PZARNj`t1;PZ@mW+hENe-uCee4{KX&pzTRw-*0Z zOl_aPV^sC|J4QX9zhhML`8!4pv*xDHq89x6utkv5Co`EvHcPtrgMt+pVVo;4d2{{& zBU!;86s*9A>0E)yobwOt7BHm)le(7}@E|?ApeN;dO5fSkhHu37}YqspPR5@SGm3pTi~Ys<}jr&+!_I zaCtJzx}VG>Qdbj-7?U^Gz{E8a87X~=d}&tQt^iq|1o<8UqNCUDFXtg%qy0MP>^)ejKsT1Z|0 z14Lh?$^l|srQ`u(U#HvwqOVi>05LC7@&M5nDSv<%*C?ey#uZ8)Ao?0)2Z-;|h_n9z zQoLx@1H^Y_1lRun(HAXyfEZV(Ew1bkDtCbBaMU?KEUVQN|HSy0IY4Z1lsQ0bYb?P2 zEKJHS6YRornerq2f%G?V(82Vh!*Ff(v3%%ZCHs}p8;~xIwu@Raxn26h^mh5s@C*-N z4V&BL)O4_X{=uyYy9f-L+vZGvu)K_0Y!+;Rs^M`#M0?L4X+A#Q7_2N0Cesu6)G~X) z=GgcwKE|BUZrtvUv2l2NJA2B!o3y-TjOSWAn{wgVXmPt3zelJyJaulciMc6wb0Z7K zFjslqM-Ra;4P!9Q73voqQylxrl760yc8(3X$i0zvGtU1h$+Bi=G2vVKzRO|R6mGS* ztIRMOYv_e6XWO|(Fio7>gua}UG{;LGIc=wtG9R0f9><=sVWBx;JTR4Irx}qopei{FsosBNuj@U-08`9fod0}E3oi5#&jh5DzC7I4f zOB-+zqO;N2;!W9TWe1r;bT(Sqg42j5g2hf5c9s-Ju;|VNkNsjDp)(|o9c7NuIqg0h z#yHZ1=?b{WMTX8x$ER9By>?w+rk zvnn`qQ&r}b?!fdA)<>L9_896WBz^c@>!Y_ccOZ&{AQ=R}v>k^` zDfU1LPb8B7pc*_#C4j;SKP1Og0X2CcGTk;TZAJ>hvT0GdL$)+GeIr+2!hf+QyWAV{K`IsJ6p6YS=|qyA9dWY%_H`L~+*y*;N3Lc5i9!!W3cQ zWSC_)I-fqyRydI@O^E6ou)>v-X*#%Sz#cP33eFY4d*Vpg(%jaUj4jRWh}wso+IE`g zUUx3WaKxt@QNo53sZGpzJVuS>C{I76@Z{9CO;8OUj^gye2|rG4YXG&j;ncQE!_sDC zcIwBoF&%%jGi+oDqG{hoGO`5G1keydmdjhl^CcQ&k)s2_dI(t`>wr)n@iaQE#y96j zJdN%`Bx`gP)Ak!rqx&Gq8eIj|;NfX>8;-2e36QVR)-YM4lj*}W+Ss>|%+qKCpdrN5 z=!@~AG&)1D9^z?q2BALUtYA%9zHbGKmlHbhWQ_2Hq=IOBj90Yx018LWGF_t=HDq|j zdWD~W2{+EF;LJ@`nOBkv3o$G0MYc4{MU@y{C^AnDX6rU~dGm?cyoL>LKB1;_E#}Q9 zjD71HUam3#>K#tC@Am@DJCewjrVmUHVSU8uWRIb43X|*3+p_cNQ~tB-@rBF(rIQHb z(#DJKLm9AzkEicIuO55pzms-8V>2B#Ij)}U=^xY+$J1GttB}4MIwAdoIb`@@fWGqQ z?al%_30CT^aHtlBKZlW%+?!k6!e;cappg)b?S_caj2Ua`RSbdK|8S?tD3THTeFmL38ez|wi!0Nh(T zZ$~pe{COCWbw`hx&!6J@oyG;SRnuYKZ~$|V*Q*Yh6jpHg4?!wZL8VTnFWW|QZy_a# zXJzIVQ#q&%;+O<;XceNBT{sInhEo|(SxP^FJgb3UhP^~2!V>_@c!GJR3|mRw7nVq# z*$l_v=wr6IHmI73a0K9#WgM5(JaEyKL-Igo-f*DRbkk68#bWdsr>1Lw8Gh8URxPsj z#9?`?RpAUrnr`-uHPLF0ZZ-jxrC@@w>Ydw|5@=5%vAP*d88Y*HZ%r04k$&{5*#T&H z{yN;l3AW*0PW%nvij;RDcSWklzUh%i?OWhN`~I$Q*rcMk%A~^-S=jRaS%z>C(zpyv z{vLSBWl8Df$9YNtv_pRx@-O z;$1cce>P9hsNpG5X5K^zSi+V$RO@Ep1edS?pt2OqZ-@2wp}g(I%SrtgUU^gH^foWr z(n{+oh7Otr`mxmjQ_sgTx8`?m7xue1RrmYr&t4jzyWEfd&x7#yga7YGzl#0ZaXjT& zqhsf;z9(4dj(B6?-Isbl_Vx$Un~C35E}i#dFNITV>FT?Xs}E^YfhSX2YnRS@*tS#G zp3#=BI(7BI)X@LT{pjPH@$61_59!)`Ge`#>_Po3 zJ^23k@}p00?rv|NeD?9xd#9&QPB)+2UA=RAiZ{-MS6BX|23P-{jGx9=RxC(PdTsTO y3Q+lv1o3?(ulZ4^`kMH?JzX;2H|Fh=+B)>{)byW*@hz+Y;=PCHv)cTJPyYoBz*@Bc literal 0 HcmV?d00001 diff --git a/problem_unittests.py b/problem_unittests.py new file mode 100644 index 0000000..63fe478 --- /dev/null +++ b/problem_unittests.py @@ -0,0 +1,296 @@ +import numpy as np +import tensorflow as tf +from tensorflow.contrib import rnn + + +def _print_success_message(): + print('Tests Passed') + + +def test_create_lookup_tables(create_lookup_tables): + with tf.Graph().as_default(): + test_text = ''' + Moe_Szyslak Moe's Tavern Where the elite meet to drink + Bart_Simpson Eh yeah hello is Mike there Last name Rotch + Moe_Szyslak Hold on I'll check Mike Rotch Mike Rotch Hey has anybody seen Mike Rotch lately + Moe_Szyslak Listen you little puke One of these days I'm gonna catch you and I'm gonna carve my name on your back with an ice pick + Moe_Szyslak Whats the matter Homer You're not your normal effervescent self + Homer_Simpson I got my problems Moe Give me another one + Moe_Szyslak Homer hey you should not drink to forget your problems + Barney_Gumble Yeah you should only drink to enhance your social skills''' + + test_text = test_text.lower() + test_text = test_text.split() + + vocab_to_int, int_to_vocab = create_lookup_tables(test_text) + + # Check types + assert isinstance(vocab_to_int, dict),\ + 'vocab_to_int is not a dictionary.' + assert isinstance(int_to_vocab, dict),\ + 'int_to_vocab is not a dictionary.' + + # Compare lengths of dicts + assert len(vocab_to_int) == len(int_to_vocab),\ + 'Length of vocab_to_int and int_to_vocab don\'t match. ' \ + 'vocab_to_int is length {}. int_to_vocab is length {}'.format(len(vocab_to_int), len(int_to_vocab)) + + # Make sure the dicts have the same words + vocab_to_int_word_set = set(vocab_to_int.keys()) + int_to_vocab_word_set = set(int_to_vocab.values()) + + assert not (vocab_to_int_word_set - int_to_vocab_word_set),\ + 'vocab_to_int and int_to_vocab don\'t have the same words.' \ + '{} found in vocab_to_int, but not in int_to_vocab'.format(vocab_to_int_word_set - int_to_vocab_word_set) + assert not (int_to_vocab_word_set - vocab_to_int_word_set),\ + 'vocab_to_int and int_to_vocab don\'t have the same words.' \ + '{} found in int_to_vocab, but not in vocab_to_int'.format(int_to_vocab_word_set - vocab_to_int_word_set) + + # Make sure the dicts have the same word ids + vocab_to_int_word_id_set = set(vocab_to_int.values()) + int_to_vocab_word_id_set = set(int_to_vocab.keys()) + + assert not (vocab_to_int_word_id_set - int_to_vocab_word_id_set),\ + 'vocab_to_int and int_to_vocab don\'t contain the same word ids.' \ + '{} found in vocab_to_int, but not in int_to_vocab'.format(vocab_to_int_word_id_set - int_to_vocab_word_id_set) + assert not (int_to_vocab_word_id_set - vocab_to_int_word_id_set),\ + 'vocab_to_int and int_to_vocab don\'t contain the same word ids.' \ + '{} found in int_to_vocab, but not in vocab_to_int'.format(int_to_vocab_word_id_set - vocab_to_int_word_id_set) + + # Make sure the dicts make the same lookup + missmatches = [(word, id, id, int_to_vocab[id]) for word, id in vocab_to_int.items() if int_to_vocab[id] != word] + + assert not missmatches,\ + 'Found {} missmatche(s). First missmatch: vocab_to_int[{}] = {} and int_to_vocab[{}] = {}'.format( + len(missmatches), + *missmatches[0]) + + assert len(vocab_to_int) > len(set(test_text))/2,\ + 'The length of vocab seems too small. Found a length of {}'.format(len(vocab_to_int)) + + _print_success_message() + + +def test_get_batches(get_batches): + with tf.Graph().as_default(): + test_batch_size = 128 + test_seq_length = 5 + test_int_text = list(range(1000*test_seq_length)) + batches = get_batches(test_int_text, test_batch_size, test_seq_length) + + # Check type + assert isinstance(batches, np.ndarray),\ + 'Batches is not a Numpy array' + + # Check shape + assert batches.shape == (7, 2, 128, 5),\ + 'Batches returned wrong shape. Found {}'.format(batches.shape) + + _print_success_message() + + +def test_tokenize(token_lookup): + with tf.Graph().as_default(): + symbols = set(['.', ',', '"', ';', '!', '?', '(', ')', '--', '\n']) + token_dict = token_lookup() + + # Check type + assert isinstance(token_dict, dict), \ + 'Returned type is {}.'.format(type(token_dict)) + + # Check symbols + missing_symbols = symbols - set(token_dict.keys()) + unknown_symbols = set(token_dict.keys()) - symbols + + assert not missing_symbols, \ + 'Missing symbols: {}'.format(missing_symbols) + assert not unknown_symbols, \ + 'Unknown symbols: {}'.format(unknown_symbols) + + # Check values type + bad_value_type = [type(val) for val in token_dict.values() if not isinstance(val, str)] + + assert not bad_value_type,\ + 'Found token as {} type.'.format(bad_value_type[0]) + + # Check for spaces + key_has_spaces = [k for k in token_dict.keys() if ' ' in k] + val_has_spaces = [val for val in token_dict.values() if ' ' in val] + + assert not key_has_spaces,\ + 'The key "{}" includes spaces. Remove spaces from keys and values'.format(key_has_spaces[0]) + assert not val_has_spaces,\ + 'The value "{}" includes spaces. Remove spaces from keys and values'.format(val_has_spaces[0]) + + # Check for symbols in values + symbol_val = () + for symbol in symbols: + for val in token_dict.values(): + if symbol in val: + symbol_val = (symbol, val) + + assert not symbol_val,\ + 'Don\'t use a symbol that will be replaced in your tokens. Found the symbol {} in value {}'.format(*symbol_val) + + _print_success_message() + + +def test_get_inputs(get_inputs): + with tf.Graph().as_default(): + input_data, targets, lr = get_inputs() + + # Check type + assert input_data.op.type == 'Placeholder',\ + 'Input not a Placeholder.' + assert targets.op.type == 'Placeholder',\ + 'Targets not a Placeholder.' + assert lr.op.type == 'Placeholder',\ + 'Learning Rate not a Placeholder.' + + # Check name + assert input_data.name == 'input:0',\ + 'Input has bad name. Found name {}'.format(input_data.name) + + # Check rank + input_rank = 0 if input_data.get_shape() == None else len(input_data.get_shape()) + targets_rank = 0 if targets.get_shape() == None else len(targets.get_shape()) + lr_rank = 0 if lr.get_shape() == None else len(lr.get_shape()) + + assert input_rank == 2,\ + 'Input has wrong rank. Rank {} found.'.format(input_rank) + assert targets_rank == 2,\ + 'Targets has wrong rank. Rank {} found.'.format(targets_rank) + assert lr_rank == 0,\ + 'Learning Rate has wrong rank. Rank {} found'.format(lr_rank) + + _print_success_message() + + +def test_get_init_cell(get_init_cell): + with tf.Graph().as_default(): + test_batch_size_ph = tf.placeholder(tf.int32) + test_rnn_size = 256 + + cell, init_state = get_init_cell(test_batch_size_ph, test_rnn_size) + + # Check type + assert isinstance(cell, tf.contrib.rnn.MultiRNNCell),\ + 'Cell is wrong type. Found {} type'.format(type(cell)) + + # Check for name attribute + assert hasattr(init_state, 'name'),\ + 'Initial state doesn\'t have the "name" attribute. Try using `tf.identity` to set the name.' + + # Check name + assert init_state.name == 'initial_state:0',\ + 'Initial state doesn\'t have the correct name. Found the name {}'.format(init_state.name) + + _print_success_message() + + +def test_get_embed(get_embed): + with tf.Graph().as_default(): + embed_shape = [50, 5, 256] + test_input_data = tf.placeholder(tf.int32, embed_shape[:2]) + test_vocab_size = 27 + test_embed_dim = embed_shape[2] + + embed = get_embed(test_input_data, test_vocab_size, test_embed_dim) + + # Check shape + assert embed.shape == embed_shape,\ + 'Wrong shape. Found shape {}'.format(embed.shape) + + _print_success_message() + + +def test_build_rnn(build_rnn): + with tf.Graph().as_default(): + test_rnn_size = 256 + test_rnn_layer_size = 2 + test_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(test_rnn_size)] * test_rnn_layer_size) + + test_inputs = tf.placeholder(tf.float32, [None, None, test_rnn_size]) + outputs, final_state = build_rnn(test_cell, test_inputs) + + # Check name + assert hasattr(final_state, 'name'),\ + 'Final state doesn\'t have the "name" attribute. Try using `tf.identity` to set the name.' + assert final_state.name == 'final_state:0',\ + 'Final state doesn\'t have the correct name. Found the name {}'.format(final_state.name) + + # Check shape + assert outputs.get_shape().as_list() == [None, None, test_rnn_size],\ + 'Outputs has wrong shape. Found shape {}'.format(outputs.get_shape()) + assert final_state.get_shape().as_list() == [test_rnn_layer_size, 2, None, test_rnn_size],\ + 'Final state wrong shape. Found shape {}'.format(final_state.get_shape()) + + _print_success_message() + + +def test_build_nn(build_nn): + with tf.Graph().as_default(): + test_input_data_shape = [128, 5] + test_input_data = tf.placeholder(tf.int32, test_input_data_shape) + test_rnn_size = 256 + test_rnn_layer_size = 2 + test_vocab_size = 27 + test_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(test_rnn_size)] * test_rnn_layer_size) + + logits, final_state = build_nn(test_cell, test_rnn_size, test_input_data, test_vocab_size) + + # Check name + assert hasattr(final_state, 'name'), \ + 'Final state doesn\'t have the "name" attribute. Are you using build_rnn?' + assert final_state.name == 'final_state:0', \ + 'Final state doesn\'t have the correct name. Found the name {}. Are you using build_rnn?'.format(final_state.name) + + # Check Shape + assert logits.get_shape().as_list() == test_input_data_shape + [test_vocab_size], \ + 'Outputs has wrong shape. Found shape {}'.format(logits.get_shape()) + assert final_state.get_shape().as_list() == [test_rnn_layer_size, 2, None, test_rnn_size], \ + 'Final state wrong shape. Found shape {}'.format(final_state.get_shape()) + + _print_success_message() + + +def test_get_tensors(get_tensors): + test_graph = tf.Graph() + with test_graph.as_default(): + test_input = tf.placeholder(tf.int32, name='input') + test_initial_state = tf.placeholder(tf.int32, name='initial_state') + test_final_state = tf.placeholder(tf.int32, name='final_state') + test_probs = tf.placeholder(tf.float32, name='probs') + + input_text, initial_state, final_state, probs = get_tensors(test_graph) + + # Check correct tensor + assert input_text == test_input,\ + 'Test input is wrong tensor' + assert initial_state == test_initial_state, \ + 'Initial state is wrong tensor' + assert final_state == test_final_state, \ + 'Final state is wrong tensor' + assert probs == test_probs, \ + 'Probabilities is wrong tensor' + + _print_success_message() + + +def test_pick_word(pick_word): + with tf.Graph().as_default(): + test_probabilities = np.array([0.1, 0.8, 0.05, 0.05]) + test_int_to_vocab = {word_i: word for word_i, word in enumerate(['this', 'is', 'a', 'test'])} + + pred_word = pick_word(test_probabilities, test_int_to_vocab) + + # Check type + assert isinstance(pred_word, str),\ + 'Predicted word is wrong type. Found {} type.'.format(type(pred_word)) + + # Check word is from vocab + assert pred_word in test_int_to_vocab.values(),\ + 'Predicted word not found in int_to_vocab.' + + + _print_success_message() \ No newline at end of file