1204

hace 3 años · 77b3d7834a
--- a/Ex2.2.ipynb
+++ b/Ex2.2.ipynb
--- a/第4节/K-Means-Viral
+++ b/第4节/K-Means-Viral
--- a/第4节/KMeans.ipynb
+++ b/第4节/KMeans.ipynb
--- a/第4节/KNN.ipynb
+++ b/第4节/KNN.ipynb
--- a/第4节/LinearRegression-Employee
+++ b/第4节/LinearRegression-Employee
--- a/Regression/Test_Employee.csv
+++ b/Regression/Test_Employee.csv
--- a/Regression/Train_Employee.csv
+++ b/Regression/Train_Employee.csv
--- a/第4节/Viral.csv
+++ b/第4节/Viral.csv
--- a/第5节/ImageClassify.ipynb
+++ b/第5节/ImageClassify.ipynb
--- a/第5节/Matplotlib.ipynb
+++ b/第5节/Matplotlib.ipynb
--- a/第6节/SMSSpamCollection.csv
+++ b/第6节/SMSSpamCollection.csv
@ -0,0 +1,30 @@
 ham,"Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."
 ham,Ok lar... Joking wif u oni...
 ham,Even my brother is not like to speak with me. They treat me like aids patent.
 ham,As per your request 'Melle Melle (Oru Minnaminunginte Nurungu Vettam)' has been set as your callertune for all Callers. Press *9 to copy your friends Callertune
 spam,"England v Macedonia - dont miss the goalsteam news. Txt ur national team to 87077 eg ENGLAND to 87077 Try:WALES, SCOTLAND 4txt.20 POBOXox36504W45WQ 16+"
 spam,"Congrats! 1 year special cinema pass for 2 is yours. call 09061209465 now! C Suprman V, Matrix3, StarWars3, etc all 4 FREE! bx420-ip4-5we. 150pm. Dont miss out! "
 ham,"Sorry, I'll call later in meeting."
 ham,Tell where you reached
 ham,Yes..gauti and sehwag out of odi series.
 spam,Please call our customer service representative on 0800 169 6031 between 10am-9pm as you have WON a guaranteed 1000 cash or 5000 prize!
 ham,Havent planning to buy later. I check already lido only got 530 show in e afternoon. U finish work already?
 spam,"Your free ringtone is waiting to be collected. Simply text the password ""MIX"" to 85069 to verify. Get Usher and Britney. FML, PO Box 5249, MK17 92H. 450Ppw 16"
 ham,How would my ip address test that considering my computer isn't a minecraft server
 ham,I know! Grumpy old people. My mom was like you better not be lying. Then again I am always the one to play jokes...
 ham,Dont worry. I guess he's busy.
 ham,What is the plural of the noun research?
 ham,Going for dinner.msg you after.
 ham,I'm ok wif it cos i like 2 try new things. But i scared u dun like mah. Cos u said not too loud.
 spam,GENT! We are trying to contact you. Last weekends draw shows that you won a 1000 prize GUARANTEED. Call 09064012160. Claim Code K52. Valid 12hrs only. 150ppm
 ham,K..k:)how much does it cost?
 ham,I'm home.
 ham,"Dear, will call Tmorrow.pls accomodate."
 ham,First answer my question.
 spam,Sunshine Quiz Wkly Q! Win a top Sony DVD player if u know which country the Algarve is in? Txt ansr to 82277. 1.50 SP:Tyrone
 ham,"K, text me when you're on the way"
 ham,"Sir, Waiting for your mail."
 spam,Customer service annoncement. You have a New Years delivery waiting for you. Please call 07046744435 now to arrange delivery
 spam,You are a winner U have been specially selected 2 receive 1000 cash or a 4* holiday (flights inc) speak to a live operator 2 claim 0871277810810
 ham,Keep yourself safe for me because I need you and I miss you already and I envy everyone that see's you in real life
 spam,URGENT! We are trying to contact you. Last weekends draw shows that you have won a 900 prize GUARANTEED. Call 09061701939. Claim code S89. Valid 12hrs only
--- a/第6节/SMSSpamCollection.ipynb
+++ b/第6节/SMSSpamCollection.ipynb
@ -0,0 +1,214 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<bound method NDFrame.head of        0                                                  1\n",
      "0    ham  Go until jurong point, crazy.. Available only ...\n",
      "1    ham                      Ok lar... Joking wif u oni...\n",
      "2    ham  Even my brother is not like to speak with me. ...\n",
      "3    ham  As per your request 'Melle Melle (Oru Minnamin...\n",
      "4   spam  England v Macedonia - dont miss the goalsteam ...\n",
      "5   spam  Congrats! 1 year special cinema pass for 2 is ...\n",
      "6    ham                 Sorry, I'll call later in meeting.\n",
      "7    ham                             Tell where you reached\n",
      "8    ham           Yes..gauti and sehwag out of odi series.\n",
      "9   spam  Please call our customer service representativ...\n",
      "10   ham  Havent planning to buy later. I check already ...\n",
      "11  spam  Your free ringtone is waiting to be collected....\n",
      "12   ham  How would my ip address test that considering ...\n",
      "13   ham  I know! Grumpy old people. My mom was like you...\n",
      "14   ham                     Dont worry. I guess he's busy.\n",
      "15   ham           What is the plural of the noun research?\n",
      "16   ham                    Going for dinner.msg you after.\n",
      "17   ham  I'm ok wif it cos i like 2 try new things. But...\n",
      "18  spam  GENT! We are trying to contact you. Last weeke...\n",
      "19   ham                       K..k:)how much does it cost?\n",
      "20   ham                                          I'm home.\n",
      "21   ham            Dear, will call Tmorrow.pls accomodate.\n",
      "22   ham                          First answer my question.\n",
      "23  spam  Sunshine Quiz Wkly Q! Win a top Sony DVD playe...\n",
      "24   ham                  K, text me when you're on the way\n",
      "25   ham                        Sir, Waiting for your mail.\n",
      "26  spam  Customer service annoncement. You have a New Y...\n",
      "27  spam  You are a winner U have been specially selecte...\n",
      "28   ham  Keep yourself safe for me because I need you a...\n",
      "29  spam  URGENT! We are trying to contact you. Last wee...>\n",
      "垃圾邮件个数：9\n",
      "正常邮件个数：21\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.linear_model.logistic import LogisticRegression\n",
    "from sklearn.model_selection import train_test_split,cross_val_score\n",
    "df = pd.read_csv('SMSSpamCollection.csv',header=None,encoding='utf-8')\n",
    "print(df.head)\n",
    "print(\"垃圾邮件个数：%s\" % df[df[0]=='spam'][0].count())\n",
    "print(\"正常邮件个数：%s\" % df[df[0]=='ham'][0].count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df[1].values\n",
    "y = df[0].values\n",
    "X_train_raw,X_test_raw,y_train,y_test=train_test_split(X,y)\n",
    "vectorizer = TfidfVectorizer()\n",
    "X_train = vectorizer.fit_transform(X_train_raw)\n",
    "X_test = vectorizer.transform(X_test_raw)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "预测为 ham ,信件为 Sunshine Quiz Wkly Q! Win a top Sony DVD player if u know which country the Algarve is in? Txt ansr to 82277. 1.50 SP:Tyrone\n",
      "预测为 ham ,信件为 As per your request 'Melle Melle (Oru Minnaminunginte Nurungu Vettam)' has been set as your callertune for all Callers. Press *9 to copy your friends Callertune\n",
      "预测为 ham ,信件为 GENT! We are trying to contact you. Last weekends draw shows that you won a 1000 prize GUARANTEED. Call 09064012160. Claim Code K52. Valid 12hrs only. 150ppm\n",
      "预测为 ham ,信件为 Customer service annoncement. You have a New Years delivery waiting for you. Please call 07046744435 now to arrange delivery\n",
      "预测为 ham ,信件为 Yes..gauti and sehwag out of odi series.\n"
     ]
    }
   ],
   "source": [
    "LR = LogisticRegression()\n",
    "LR.fit(X_train,y_train)\n",
    "predictions = LR.predict(X_test)\n",
    "for i,prediction in enumerate(predictions[:5]):\n",
    "    print(\"预测为 %s ,信件为 %s\" % (prediction,X_test_raw[i]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "             precision    recall  f1-score   support\n",
      "\n",
      "        ham       0.50      1.00      0.67         4\n",
      "       spam       0.00      0.00      0.00         4\n",
      "\n",
      "avg / total       0.25      0.50      0.33         8\n",
      "\n",
      "准确率为:  [0.8  0.8  0.75 0.75 0.75]\n",
      "平均准确率为:  0.77\n",
      "平均精准率为:  0.0\n",
      "平均召回率为:  0.0\n",
      "平均F1值为:  0.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "C:\\Users\\52257\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "print(classification_report(y_test,predictions))\n",
    "\n",
    "from sklearn.metrics import roc_curve,auc\n",
    "# 准确率\n",
    "scores =  cross_val_score(LR,X_train,y_train,cv=5)\n",
    "print(\"准确率为: \",scores)\n",
    "print(\"平均准确率为: \",np.mean(scores))\n",
    "\n",
    "# 有时必须要将标签转为数值\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "class_le = LabelEncoder()\n",
    "y_train_n = class_le.fit_transform(y_train)\n",
    "y_test_n = class_le.fit_transform(y_test)\n",
    "\n",
    "# 精准率\n",
    "precision =  cross_val_score(LR,X_train,y_train_n,cv=5,scoring='precision')\n",
    "print(\"平均精准率为: \",np.mean(precision))\n",
    "# 召回率\n",
    "recall =  cross_val_score(LR,X_train,y_train_n,cv=5,scoring='recall')\n",
    "print(\"平均召回率为: \",np.mean(recall))   \n",
    "# F1值\n",
    "f1 =  cross_val_score(LR,X_train,y_train_n,cv=5,scoring='f1')\n",
    "print(\"平均F1值为: \",np.mean(f1))  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 警报通常是由于系统对某种标签没有找到。尝试将数据集中加入更多的数据，再次运行，查看结果。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/第6节/SMSSpamCollection.txt
+++ b/第6节/SMSSpamCollection.txt