10185101272
/
anime-recommend-wsy


								{

								 "cells": [

								  {

								   "cell_type": "code",

								   "execution_count": 1,

								   "metadata": {},

								   "outputs": [

								    {

								     "ename": "ModuleNotFoundError",

								     "evalue": "No module named 'surprise'",

								     "output_type": "error",

								     "traceback": [

								      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",

								      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",

								      "\u001b[1;32m<ipython-input-1-002ce27085d1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0msurprise\u001b[0m  \u001b[1;31m# run 'pip install scikit-surprise' to install surprise\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",

								      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'surprise'"

								     ]

								    }

								   ],

								   "source": [

								    "import numpy as np\n",

								    "import surprise  # run 'pip install scikit-surprise' to install surprise"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 3,

								   "metadata": {

								    "collapsed": true

								   },

								   "outputs": [],

								   "source": [

								    "class MatrixFacto(surprise.AlgoBase):\n",

								    "    '''A basic rating prediction algorithm based on matrix factorization.'''\n",

								    "    \n",

								    "    def __init__(self, learning_rate, n_epochs, n_factors):\n",

								    "        \n",

								    "        self.lr = learning_rate  # learning rate for SGD\n",

								    "        self.n_epochs = n_epochs  # number of iterations of SGD\n",

								    "        self.n_factors = n_factors  # number of factors\n",

								    "        \n",

								    "    def fit(self, trainset):\n",

								    "        '''Learn the vectors p_u and q_i with SGD'''\n",

								    "        \n",

								    "        print('Fitting data with SGD...')\n",

								    "        \n",

								    "        # Randomly initialize the user and item factors.\n",

								    "        p = np.random.normal(0, .1, (trainset.n_users, self.n_factors))\n",

								    "        q = np.random.normal(0, .1, (trainset.n_items, self.n_factors))\n",

								    "        \n",

								    "        # SGD procedure\n",

								    "        for _ in range(self.n_epochs):\n",

								    "            for u, i, r_ui in trainset.all_ratings():\n",

								    "                err = r_ui - np.dot(p[u], q[i])\n",

								    "                # Update vectors p_u and q_i\n",

								    "                p[u] += self.lr * err * q[i]\n",

								    "                q[i] += self.lr * err * p[u]\n",

								    "                # Note: in the update of q_i, we should actually use the previous (non-updated) value of p_u.\n",

								    "                # In practice it makes almost no difference.\n",

								    "        \n",

								    "        self.p, self.q = p, q\n",

								    "        self.trainset = trainset\n",

								    "\n",

								    "    def estimate(self, u, i):\n",

								    "        '''Return the estmimated rating of user u for item i.'''\n",

								    "        \n",

								    "        # return scalar product between p_u and q_i if user and item are known,\n",

								    "        # else return the average of all ratings\n",

								    "        if self.trainset.knows_user(u) and self.trainset.knows_item(i):\n",

								    "            return np.dot(self.p[u], self.q[i])\n",

								    "        else:\n",

								    "            return self.trainset.global_mean"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 11,

								   "metadata": {

								    "collapsed": true

								   },

								   "outputs": [],

								   "source": [

								    "# data loading. We'll use the movielens dataset (https://grouplens.org/datasets/movielens/100k/)\n",

								    "# it will be downloaded automatically.\n",

								    "data = surprise.Dataset.load_builtin('ml-100k')\n",

								    "data.split(2)  # split data for 2-folds cross validation"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 12,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "Evaluating RMSE of algorithm MatrixFacto.\n",

								      "\n",

								      "------------\n",

								      "Fold 1\n",

								      "Fitting data with SGD...\n",

								      "RMSE: 0.9826\n",

								      "------------\n",

								      "Fold 2\n",

								      "Fitting data with SGD...\n",

								      "RMSE: 0.9873\n",

								      "------------\n",

								      "------------\n",

								      "Mean RMSE: 0.9849\n",

								      "------------\n",

								      "------------\n"

								     ]

								    },

								    {

								     "data": {

								      "text/plain": [

								       "CaseInsensitiveDefaultDict(list,\n",

								       "                           {'rmse': [0.98263312180825368, 0.9872549391926676]})"

								      ]

								     },

								     "execution_count": 12,

								     "metadata": {},

								     "output_type": "execute_result"

								    }

								   ],

								   "source": [

								    "algo = MatrixFacto(learning_rate=.01, n_epochs=10, n_factors=10)\n",

								    "surprise.evaluate(algo, data, measures=['RMSE'])"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 13,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "Evaluating RMSE of algorithm KNNBasic.\n",

								      "\n",

								      "------------\n",

								      "Fold 1\n",

								      "Computing the msd similarity matrix...\n",

								      "Done computing similarity matrix.\n",

								      "RMSE: 1.0101\n",

								      "------------\n",

								      "Fold 2\n",

								      "Computing the msd similarity matrix...\n",

								      "Done computing similarity matrix.\n",

								      "RMSE: 0.9982\n",

								      "------------\n",

								      "------------\n",

								      "Mean RMSE: 1.0042\n",

								      "------------\n",

								      "------------\n"

								     ]

								    },

								    {

								     "data": {

								      "text/plain": [

								       "CaseInsensitiveDefaultDict(list,\n",

								       "                           {'rmse': [1.0101383334175613, 0.99823558896449016]})"

								      ]

								     },

								     "execution_count": 13,

								     "metadata": {},

								     "output_type": "execute_result"

								    }

								   ],

								   "source": [

								    "# try a neighborhood-based algorithm (on the same data)\n",

								    "algo = surprise.KNNBasic()\n",

								    "surprise.evaluate(algo, data, measures=['RMSE'])"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 14,

								   "metadata": {},

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "Evaluating RMSE of algorithm SVD.\n",

								      "\n",

								      "------------\n",

								      "Fold 1\n",

								      "RMSE: 0.9604\n",

								      "------------\n",

								      "Fold 2\n",

								      "RMSE: 0.9538\n",

								      "------------\n",

								      "------------\n",

								      "Mean RMSE: 0.9571\n",

								      "------------\n",

								      "------------\n"

								     ]

								    },

								    {

								     "data": {

								      "text/plain": [

								       "CaseInsensitiveDefaultDict(list,\n",

								       "                           {'rmse': [0.96042083843476056,\n",

								       "                             0.95382688332712151]})"

								      ]

								     },

								     "execution_count": 14,

								     "metadata": {},

								     "output_type": "execute_result"

								    }

								   ],

								   "source": [

								    "# try a more sophisticated matrix factorization algorithm (on the same data)\n",

								    "algo = surprise.SVD()\n",

								    "surprise.evaluate(algo, data, measures=['RMSE'])"

								   ]

								  }

								 ],

								 "metadata": {

								  "kernelspec": {

								   "display_name": "Python 3",

								   "language": "python",

								   "name": "python3"

								  },

								  "language_info": {

								   "codemirror_mode": {

								    "name": "ipython",

								    "version": 3

								   },

								   "file_extension": ".py",

								   "mimetype": "text/x-python",

								   "name": "python",

								   "nbconvert_exporter": "python",

								   "pygments_lexer": "ipython3",

								   "version": "3.7.6"

								  },

								  "latex_envs": {

								   "LaTeX_envs_menu_present": true,

								   "autoclose": false,

								   "autocomplete": true,

								   "bibliofile": "biblio.bib",

								   "cite_by": "apalike",

								   "current_citInitial": 1,

								   "eqLabelWithNumbers": true,

								   "eqNumInitial": 1,

								   "hotkeys": {

								    "equation": "Ctrl-E",

								    "itemize": "Ctrl-I"

								   },

								   "labels_anchors": false,

								   "latex_user_defs": false,

								   "report_style_numbering": false,

								   "user_envs_cfg": false

								  },

								  "toc": {

								   "base_numbering": 1,

								   "nav_menu": {},

								   "number_sections": true,

								   "sideBar": true,

								   "skip_h1_title": false,

								   "title_cell": "Table of Contents",

								   "title_sidebar": "Contents",

								   "toc_cell": false,

								   "toc_position": {},

								   "toc_section_display": true,

								   "toc_window_display": false

								  }

								 },

								 "nbformat": 4,

								 "nbformat_minor": 2

								}