{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "odps.Schema {\n",
      "  login                     string        \n",
      "  created_at                date          \n",
      "  database_id               int64         \n",
      "  location                  string        \n",
      "  company                   string        \n",
      "  bio                       string        \n",
      "  is_employee               boolean       \n",
      "  email                     string        \n",
      "  infoname                  string        \n",
      "  followers                 string        \n",
      "  following                 string        \n",
      "  time                      date          \n",
      "  name                      string        \n",
      "  lastupdatedat             date          \n",
      "  nextupdateat              date          \n",
      "}\n",
      "\n",
      "odps.Schema {\n",
      "  id                                                                          string                  \n",
      "  type                                                                        string                  \n",
      "  action                                                                      string                  \n",
      "  actor_id                                                                    int64                   \n",
      "  actor_login                                                                 string                  \n",
      "  repo_id                                                                     int64                   \n",
      "  repo_name                                                                   string                  \n",
      "  org_id                                                                      int64                   \n",
      "  org_login                                                                   string                  \n",
      "  created_at                                                                  datetime                \n",
      "  issue_id                                                                    int64                   \n",
      "  issue_number                                                                int32                   \n",
      "  issue_title                                                                 string                  \n",
      "  issue_body                                                                  string                  \n",
      "  issue_labels_name                                                           list<string>            \n",
      "  issue_labels_color                                                          list<string>            \n",
      "  issue_labels_default                                                        list<string>            \n",
      "  issue_labels_description                                                    list<string>            \n",
      "  issue_author_id                                                             int64                   \n",
      "  issue_author_login                                                          string                  \n",
      "  issue_author_type                                                           string                  \n",
      "  issue_author_association                                                    string                  \n",
      "  issue_assignee_id                                                           int64                   \n",
      "  issue_assignee_login                                                        string                  \n",
      "  issue_assignees_id                                                          list<string>            \n",
      "  issue_assignees_login                                                       list<string>            \n",
      "  issue_created_at                                                            datetime                \n",
      "  issue_updated_at                                                            datetime                \n",
      "  issue_comments                                                              int16                   \n",
      "  issue_closed_at                                                             datetime                \n",
      "  issue_comment_id                                                            int64                   \n",
      "  issue_comment_body                                                          string                  \n",
      "  issue_comment_created_at                                                    datetime                \n",
      "  issue_comment_updated_at                                                    datetime                \n",
      "  issue_comment_author_association                                            string                  \n",
      "  issue_comment_author_id                                                     int64                   \n",
      "  issue_comment_author_login                                                  string                  \n",
      "  issue_comment_author_type                                                   string                  \n",
      "  pull_commits                                                                int16                   \n",
      "  pull_additions                                                              int16                   \n",
      "  pull_deletions                                                              int16                   \n",
      "  pull_changed_files                                                          int32                   \n",
      "  pull_merged                                                                 int8                    \n",
      "  pull_merge_commit_sha                                                       string                  \n",
      "  pull_merged_at                                                              datetime                \n",
      "  pull_merged_by_id                                                           int64                   \n",
      "  pull_merged_by_login                                                        string                  \n",
      "  pull_merged_by_type                                                         string                  \n",
      "  pull_requested_reviewer_id                                                  int64                   \n",
      "  pull_requested_reviewer_login                                               string                  \n",
      "  pull_requested_reviewer_type                                                string                  \n",
      "  pull_review_comments                                                        int16                   \n",
      "  repo_description                                                            string                  \n",
      "  repo_size                                                                   int32                   \n",
      "  repo_stargazers_count                                                       int32                   \n",
      "  repo_forks_count                                                            int32                   \n",
      "  repo_language                                                               string                  \n",
      "  repo_has_issues                                                             int8                    \n",
      "  repo_has_projects                                                           int8                    \n",
      "  repo_has_downloads                                                          int8                    \n",
      "  repo_has_wiki                                                               int8                    \n",
      "  repo_has_pages                                                              int8                    \n",
      "  repo_license                                                                string                  \n",
      "  repo_default_branch                                                         string                  \n",
      "  repo_created_at                                                             datetime                \n",
      "  repo_updated_at                                                             datetime                \n",
      "  repo_pushed_at                                                              datetime                \n",
      "  pull_review_id                                                              int64                   \n",
      "  pull_review_comment_id                                                      int64                   \n",
      "  pull_review_comment_path                                                    string                  \n",
      "  pull_review_comment_position                                                string                  \n",
      "  pull_review_comment_author_id                                               int64                   \n",
      "  pull_review_comment_author_login                                            string                  \n",
      "  pull_review_comment_author_type                                             string                  \n",
      "  pull_review_comment_author_association                                      string                  \n",
      "  pull_review_comment_body                                                    string                  \n",
      "  pull_review_comment_created_at                                              datetime                \n",
      "  pull_review_comment_updated_at                                              datetime                \n",
      "  push_id                                                                     int64                   \n",
      "  push_size                                                                   int32                   \n",
      "  push_distinct_size                                                          int32                   \n",
      "  push_ref                                                                    string                  \n",
      "  push_head                                                                   string                  \n",
      "  push_before                                                                 string                  \n",
      "  push_commits_name                                                           list<string>            \n",
      "  push_commits_email                                                          list<string>            \n",
      "  push_commits_message                                                        list<string>            \n",
      "  fork_forkee_id                                                              int64                   \n",
      "  fork_forkee_full_name                                                       string                  \n",
      "  fork_forkee_owner_id                                                        int64                   \n",
      "  fork_forkee_owner_login                                                     string                  \n",
      "  fork_forkee_owner_type                                                      string                  \n",
      "  delete_ref                                                                  string                  \n",
      "  delete_ref_type                                                             string                  \n",
      "  delete_pusher_type                                                          string                  \n",
      "  create_ref                                                                  string                  \n",
      "  create_ref_type                                                             string                  \n",
      "  create_master_branch                                                        string                  \n",
      "  create_description                                                          string                  \n",
      "  create_pusher_type                                                          string                  \n",
      "  gollum_pages_page_name                                                      list<string>            \n",
      "  gollum_pages_title                                                          list<string>            \n",
      "  gollum_pages_action                                                         list<string>            \n",
      "  member_login                                                                string                  \n",
      "  member_type                                                                 string                  \n",
      "  member_id                                                                   int64                   \n",
      "  release_id                                                                  int64                   \n",
      "  release_tag_name                                                            string                  \n",
      "  release_target_commitish                                                    string                  \n",
      "  release_name                                                                string                  \n",
      "  release_draft                                                               int8                    \n",
      "  release_author_id                                                           int64                   \n",
      "  release_author_login                                                        string                  \n",
      "  release_author_type                                                         string                  \n",
      "  release_prerelease                                                          int8                    \n",
      "  release_created_at                                                          datetime                \n",
      "  release_published_at                                                        datetime                \n",
      "  release_body                                                                string                  \n",
      "  release_assets_name                                                         list<string>            \n",
      "  release_assets_uploader_login                                               list<string>            \n",
      "  release_assets_uploader_id                                                  list<string>            \n",
      "  release_assets_content_type                                                 list<string>            \n",
      "  release_assets_state                                                        list<string>            \n",
      "  release_assets_size                                                         list<string>            \n",
      "  release_assets_download_count                                               list<string>            \n",
      "  commit_comment_id                                                           int64                   \n",
      "  commit_comment_author_id                                                    int64                   \n",
      "  commit_comment_author_login                                                 string                  \n",
      "  commit_comment_author_type                                                  string                  \n",
      "  commit_comment_author_association                                           string                  \n",
      "  commit_comment_body                                                         string                  \n",
      "  commit_comment_path                                                         string                  \n",
      "  commit_comment_position                                                     string                  \n",
      "  commit_comment_line                                                         string                  \n",
      "  commit_comment_created_at                                                   datetime                \n",
      "  commit_comment_updated_at                                                   datetime                \n",
      "  pt                                                                          string                  \n",
      "}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from odps import ODPS\n",
    "from odps import options\n",
    "from odps.df import DataFrame\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "pd.set_option('display.max_rows',None)\n",
    "\n",
    "ACCESS_ID = 'LTAI5t9uwJrh5eJ7Q5E37D1s'\n",
    "SECRET_ACCESS_KEY = 'NCFHOAnvqfnTrpypgR4b3cNawP8fnB'\n",
    "ODPS_PROJECT = 'OpenDigger_prod_dev'\n",
    "ODPS_ENDPOINT = 'http://service.cn-shanghai.maxcompute.aliyun.com/api'\n",
    "\n",
    "o = ODPS(ACCESS_ID, SECRET_ACCESS_KEY,\n",
    "         project=ODPS_PROJECT, endpoint=ODPS_ENDPOINT)\n",
    "options.tunnel.limit_instance_tunnel = False\n",
    "# options.read_timeout = 10000000\n",
    "\n",
    "users = DataFrame(o.get_table('ods_github_users'))\n",
    "print(users.dtypes)\n",
    "\n",
    "github_log = DataFrame(o.get_table('ods_github_log'))\n",
    "print(github_log.dtypes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "sql = '''\n",
    "    select type, count(repo_id), repo_id from ods_github_log\n",
    "    where pt='20151001'\n",
    "    and type in ('PullRequestEvent','WatchEvent','ForkEvent','IssueCommentEvent')\n",
    "    group by type, repo_id;\n",
    "'''\n",
    "\n",
    "result = o.execute_sql(sql, hints={'odps.sql.allow.fullscan': 'true', 'odps.sql.submit.mode': 'script'})\n",
    "with open('data\\count.txt', 'w') as f:\n",
    "    with result.open_reader() as reader:\n",
    "        for record in reader:\n",
    "            type = record['type']\n",
    "            count = record['_c1']\n",
    "            repo_id = record['repo_id']       \n",
    "            f.write('type: {type}, repo_id: {repo_id}, count: {count}\\n'.format(\n",
    "                                                        type=type,\n",
    "                                                        repo_id=repo_id,\n",
    "                                                        count=count))    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "graph_dict = {}\n",
    "graph_dict['PullRequestEvent'] = {}\n",
    "graph_dict['WatchEvent'] = {}\n",
    "graph_dict['ForkEvent'] = {}\n",
    "graph_dict['IssueCommentEvent'] = {}\n",
    "sql = '''\n",
    "    select type, repo_id, actor_id\n",
    "    from ods_github_log\n",
    "    where pt='20151001'\n",
    "        and type in ('PullRequestEvent','WatchEvent','ForkEvent','IssueCommentEvent')\n",
    "    group by type, repo_id, actor_id;\n",
    "'''\n",
    "result = o.execute_sql(sql, hints={'odps.sql.allow.fullscan': 'true', 'odps.sql.submit.mode': 'script'})\n",
    "with result.open_reader() as reader:\n",
    "    for record in reader:\n",
    "        type = record['type']\n",
    "        actor_id = record['actor_id']\n",
    "        repo_id = record['repo_id']\n",
    "        if actor_id not in graph_dict[type]:\n",
    "            graph_dict[type][actor_id] = []\n",
    "        graph_dict[type][actor_id].append(str(repo_id))\n",
    "\n",
    "# print(graph_dict)\n",
    "\n",
    "with open('data\\PullRequestEvent.txt', 'w') as f:\n",
    "    for key in graph_dict['PullRequestEvent']:\n",
    "        if len(graph_dict['PullRequestEvent'][key]) < 2:\n",
    "            continue\n",
    "        f.write('actor_id: {actor_id}\\nrepo_id: {list}\\n'.format(actor_id=key, list=' '.join(graph_dict['PullRequestEvent'][key])))\n",
    "\n",
    "with open('data\\WatchEvent.txt', 'w') as f:\n",
    "    for key in graph_dict['WatchEvent']:\n",
    "        if len(graph_dict['WatchEvent'][key]) < 2:\n",
    "            continue\n",
    "        f.write('actor_id: {actor_id}\\nrepo_id: {list}\\n'.format(actor_id=key, list=' '.join(graph_dict['WatchEvent'][key])))\n",
    "\n",
    "with open('data\\ForkEvent.txt', 'w') as f:\n",
    "    for key in graph_dict['ForkEvent']:\n",
    "        if len(graph_dict['ForkEvent'][key]) < 2:\n",
    "            continue\n",
    "        f.write('actor_id: {actor_id}\\nrepo_id: {list}\\n'.format(actor_id=key, list=' '.join(graph_dict['ForkEvent'][key])))\n",
    "\n",
    "with open('data\\IssueCommentEvent.txt', 'w') as f:\n",
    "    for key in graph_dict['IssueCommentEvent']:\n",
    "        if len(graph_dict['IssueCommentEvent'][key]) < 2:\n",
    "            continue\n",
    "        f.write('actor_id: {actor_id}\\nrepo_id: {list}\\n'.format(actor_id=key, list=' '.join(graph_dict['IssueCommentEvent'][key])))"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "caac794e4b8e34bcc9a4d9e1a06492e263031294735d822cbf2db7854bb6c6da"
  },
  "kernelspec": {
   "display_name": "Python 3.10.4 64-bit (windows store)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}