10185101272
/
anime-recommend-wsy

{ "cells": [  {   "cell_type": "code",   "execution_count": 1,   "metadata": {},   "outputs": [    {     "ename": "ModuleNotFoundError",     "evalue": "No module named 'surprise'",     "output_type": "error",     "traceback": [      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",      "\u001b[1;32m<ipython-input-1-002ce27085d1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0msurprise\u001b[0m  \u001b[1;31m# run 'pip install scikit-surprise' to install surprise\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'surprise'"     ]    }   ],   "source": [    "import numpy as np\n",    "import surprise  # run 'pip install scikit-surprise' to install surprise"   ]  },  {   "cell_type": "code",   "execution_count": 3,   "metadata": {    "collapsed": true   },   "outputs": [],   "source": [    "class MatrixFacto(surprise.AlgoBase):\n",    "    '''A basic rating prediction algorithm based on matrix factorization.'''\n",    "    \n",    "    def __init__(self, learning_rate, n_epochs, n_factors):\n",    "        \n",    "        self.lr = learning_rate  # learning rate for SGD\n",    "        self.n_epochs = n_epochs  # number of iterations of SGD\n",    "        self.n_factors = n_factors  # number of factors\n",    "        \n",    "    def fit(self, trainset):\n",    "        '''Learn the vectors p_u and q_i with SGD'''\n",    "        \n",    "        print('Fitting data with SGD...')\n",    "        \n",    "        # Randomly initialize the user and item factors.\n",    "        p = np.random.normal(0, .1, (trainset.n_users, self.n_factors))\n",    "        q = np.random.normal(0, .1, (trainset.n_items, self.n_factors))\n",    "        \n",    "        # SGD procedure\n",    "        for _ in range(self.n_epochs):\n",    "            for u, i, r_ui in trainset.all_ratings():\n",    "                err = r_ui - np.dot(p[u], q[i])\n",    "                # Update vectors p_u and q_i\n",    "                p[u] += self.lr * err * q[i]\n",    "                q[i] += self.lr * err * p[u]\n",    "                # Note: in the update of q_i, we should actually use the previous (non-updated) value of p_u.\n",    "                # In practice it makes almost no difference.\n",    "        \n",    "        self.p, self.q = p, q\n",    "        self.trainset = trainset\n",    "\n",    "    def estimate(self, u, i):\n",    "        '''Return the estmimated rating of user u for item i.'''\n",    "        \n",    "        # return scalar product between p_u and q_i if user and item are known,\n",    "        # else return the average of all ratings\n",    "        if self.trainset.knows_user(u) and self.trainset.knows_item(i):\n",    "            return np.dot(self.p[u], self.q[i])\n",    "        else:\n",    "            return self.trainset.global_mean"   ]  },  {   "cell_type": "code",   "execution_count": 11,   "metadata": {    "collapsed": true   },   "outputs": [],   "source": [    "# data loading. We'll use the movielens dataset (https://grouplens.org/datasets/movielens/100k/)\n",    "# it will be downloaded automatically.\n",    "data = surprise.Dataset.load_builtin('ml-100k')\n",    "data.split(2)  # split data for 2-folds cross validation"   ]  },  {   "cell_type": "code",   "execution_count": 12,   "metadata": {},   "outputs": [    {     "name": "stdout",     "output_type": "stream",     "text": [      "Evaluating RMSE of algorithm MatrixFacto.\n",      "\n",      "------------\n",      "Fold 1\n",      "Fitting data with SGD...\n",      "RMSE: 0.9826\n",      "------------\n",      "Fold 2\n",      "Fitting data with SGD...\n",      "RMSE: 0.9873\n",      "------------\n",      "------------\n",      "Mean RMSE: 0.9849\n",      "------------\n",      "------------\n"     ]    },    {     "data": {      "text/plain": [       "CaseInsensitiveDefaultDict(list,\n",       "                           {'rmse': [0.98263312180825368, 0.9872549391926676]})"      ]     },     "execution_count": 12,     "metadata": {},     "output_type": "execute_result"    }   ],   "source": [    "algo = MatrixFacto(learning_rate=.01, n_epochs=10, n_factors=10)\n",    "surprise.evaluate(algo, data, measures=['RMSE'])"   ]  },  {   "cell_type": "code",   "execution_count": 13,   "metadata": {},   "outputs": [    {     "name": "stdout",     "output_type": "stream",     "text": [      "Evaluating RMSE of algorithm KNNBasic.\n",      "\n",      "------------\n",      "Fold 1\n",      "Computing the msd similarity matrix...\n",      "Done computing similarity matrix.\n",      "RMSE: 1.0101\n",      "------------\n",      "Fold 2\n",      "Computing the msd similarity matrix...\n",      "Done computing similarity matrix.\n",      "RMSE: 0.9982\n",      "------------\n",      "------------\n",      "Mean RMSE: 1.0042\n",      "------------\n",      "------------\n"     ]    },    {     "data": {      "text/plain": [       "CaseInsensitiveDefaultDict(list,\n",       "                           {'rmse': [1.0101383334175613, 0.99823558896449016]})"      ]     },     "execution_count": 13,     "metadata": {},     "output_type": "execute_result"    }   ],   "source": [    "# try a neighborhood-based algorithm (on the same data)\n",    "algo = surprise.KNNBasic()\n",    "surprise.evaluate(algo, data, measures=['RMSE'])"   ]  },  {   "cell_type": "code",   "execution_count": 14,   "metadata": {},   "outputs": [    {     "name": "stdout",     "output_type": "stream",     "text": [      "Evaluating RMSE of algorithm SVD.\n",      "\n",      "------------\n",      "Fold 1\n",      "RMSE: 0.9604\n",      "------------\n",      "Fold 2\n",      "RMSE: 0.9538\n",      "------------\n",      "------------\n",      "Mean RMSE: 0.9571\n",      "------------\n",      "------------\n"     ]    },    {     "data": {      "text/plain": [       "CaseInsensitiveDefaultDict(list,\n",       "                           {'rmse': [0.96042083843476056,\n",       "                             0.95382688332712151]})"      ]     },     "execution_count": 14,     "metadata": {},     "output_type": "execute_result"    }   ],   "source": [    "# try a more sophisticated matrix factorization algorithm (on the same data)\n",    "algo = surprise.SVD()\n",    "surprise.evaluate(algo, data, measures=['RMSE'])"   ]  } ], "metadata": {  "kernelspec": {   "display_name": "Python 3",   "language": "python",   "name": "python3"  },  "language_info": {   "codemirror_mode": {    "name": "ipython",    "version": 3   },   "file_extension": ".py",   "mimetype": "text/x-python",   "name": "python",   "nbconvert_exporter": "python",   "pygments_lexer": "ipython3",   "version": "3.7.6"  },  "latex_envs": {   "LaTeX_envs_menu_present": true,   "autoclose": false,   "autocomplete": true,   "bibliofile": "biblio.bib",   "cite_by": "apalike",   "current_citInitial": 1,   "eqLabelWithNumbers": true,   "eqNumInitial": 1,   "hotkeys": {    "equation": "Ctrl-E",    "itemize": "Ctrl-I"   },   "labels_anchors": false,   "latex_user_defs": false,   "report_style_numbering": false,   "user_envs_cfg": false  },  "toc": {   "base_numbering": 1,   "nav_menu": {},   "number_sections": true,   "sideBar": true,   "skip_h1_title": false,   "title_cell": "Table of Contents",   "title_sidebar": "Contents",   "toc_cell": false,   "toc_position": {},   "toc_section_display": true,   "toc_window_display": false  } }, "nbformat": 4, "nbformat_minor": 2}