commit 632b85a6b76c4af8e9b03f6eff3ddd616ec7534d Author: spike Date: Wed Feb 22 00:17:04 2017 +0100 initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..33a1691 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ + +tensorflow==1.0.0 +tflearn==0.3 diff --git a/Sentiment Analysis with TFlearn - LSTM - Word2Vec.ipynb b/Sentiment Analysis with TFlearn - LSTM - Word2Vec.ipynb new file mode 100644 index 0000000..4d762aa --- /dev/null +++ b/Sentiment Analysis with TFlearn - LSTM - Word2Vec.ipynb @@ -0,0 +1,154 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from __future__ import division, print_function, absolute_import\n", + "\n", + "import tflearn\n", + "from tflearn.data_utils import to_categorical, pad_sequences\n", + "from tflearn.datasets import imdb\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "train, valid, test = imdb.load_data(path='imdb.pkl', n_words=10000, valid_portion=0.1)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "trainX, trainY = train\n", + "validX, validY = valid\n", + "testX, testY = test\n", + "\n", + "# Test set: 25% of the full test set\n", + "test_len = int(0.25*len(testX))\n", + "testX = testX[:test_len]\n", + "testY = testY[:test_len]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "#Data preprocessing\n", + "# Sequence padding\n", + "trainX = pad_sequences(trainX, maxlen=100, value=0.)\n", + "validX = pad_sequences(validX, maxlen=100, value=0.)\n", + "testX = pad_sequences(testX, maxlen=100)\n", + "\n", + "#Convert labels to binary vectors\n", + "trainY = to_categorical(trainY, nb_classes=2)\n", + "validY = to_categorical(validY, nb_classes=2)\n", + "testY = to_categorical(testY, nb_classes=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Network building\n", + "net = tflearn.input_data([None, 100])\n", + "net = tflearn.embedding(net, input_dim=10000, output_dim=128)\n", + "net = tflearn.lstm(net, 128, dropout=0.8)\n", + "net = tflearn.fully_connected(net, 2, activation='softmax')\n", + "net = tflearn.regression(net, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training Step: 7039 | total loss: \u001b[1m\u001b[32m0.03469\u001b[0m\u001b[0m | time: 47.844s\n", + "| Adam | epoch: 010 | loss: 0.03469 - acc: 0.9900 -- iter: 22496/22500\n", + "Training Step: 7040 | total loss: \u001b[1m\u001b[32m0.03141\u001b[0m\u001b[0m | time: 49.158s\n", + "| Adam | epoch: 010 | loss: 0.03141 - acc: 0.9910 | val_loss: 0.98614 - val_acc: 0.7904 -- iter: 22500/22500\n", + "--\n" + ] + } + ], + "source": [ + "# Training\n", + "model = tflearn.DNN(net, tensorboard_verbose=0)\n", + "model.fit(trainX, trainY, validation_set=(validX, validY), show_metric=True, batch_size=32)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.84079999971389774]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## Testing the model\n", + "model.evaluate(testX[:test_len], testY[:test_len])" + ] + } + ], + "metadata": { + "hide_input": false, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}