{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"odps.Schema {\n",
|
|
" login string \n",
|
|
" created_at date \n",
|
|
" database_id int64 \n",
|
|
" location string \n",
|
|
" company string \n",
|
|
" bio string \n",
|
|
" is_employee boolean \n",
|
|
" email string \n",
|
|
" infoname string \n",
|
|
" followers string \n",
|
|
" following string \n",
|
|
" time date \n",
|
|
" name string \n",
|
|
" lastupdatedat date \n",
|
|
" nextupdateat date \n",
|
|
"}\n",
|
|
"\n",
|
|
"odps.Schema {\n",
|
|
" id string \n",
|
|
" type string \n",
|
|
" action string \n",
|
|
" actor_id int64 \n",
|
|
" actor_login string \n",
|
|
" repo_id int64 \n",
|
|
" repo_name string \n",
|
|
" org_id int64 \n",
|
|
" org_login string \n",
|
|
" created_at datetime \n",
|
|
" issue_id int64 \n",
|
|
" issue_number int32 \n",
|
|
" issue_title string \n",
|
|
" issue_body string \n",
|
|
" issue_labels_name list<string> \n",
|
|
" issue_labels_color list<string> \n",
|
|
" issue_labels_default list<string> \n",
|
|
" issue_labels_description list<string> \n",
|
|
" issue_author_id int64 \n",
|
|
" issue_author_login string \n",
|
|
" issue_author_type string \n",
|
|
" issue_author_association string \n",
|
|
" issue_assignee_id int64 \n",
|
|
" issue_assignee_login string \n",
|
|
" issue_assignees_id list<string> \n",
|
|
" issue_assignees_login list<string> \n",
|
|
" issue_created_at datetime \n",
|
|
" issue_updated_at datetime \n",
|
|
" issue_comments int16 \n",
|
|
" issue_closed_at datetime \n",
|
|
" issue_comment_id int64 \n",
|
|
" issue_comment_body string \n",
|
|
" issue_comment_created_at datetime \n",
|
|
" issue_comment_updated_at datetime \n",
|
|
" issue_comment_author_association string \n",
|
|
" issue_comment_author_id int64 \n",
|
|
" issue_comment_author_login string \n",
|
|
" issue_comment_author_type string \n",
|
|
" pull_commits int16 \n",
|
|
" pull_additions int16 \n",
|
|
" pull_deletions int16 \n",
|
|
" pull_changed_files int32 \n",
|
|
" pull_merged int8 \n",
|
|
" pull_merge_commit_sha string \n",
|
|
" pull_merged_at datetime \n",
|
|
" pull_merged_by_id int64 \n",
|
|
" pull_merged_by_login string \n",
|
|
" pull_merged_by_type string \n",
|
|
" pull_requested_reviewer_id int64 \n",
|
|
" pull_requested_reviewer_login string \n",
|
|
" pull_requested_reviewer_type string \n",
|
|
" pull_review_comments int16 \n",
|
|
" repo_description string \n",
|
|
" repo_size int32 \n",
|
|
" repo_stargazers_count int32 \n",
|
|
" repo_forks_count int32 \n",
|
|
" repo_language string \n",
|
|
" repo_has_issues int8 \n",
|
|
" repo_has_projects int8 \n",
|
|
" repo_has_downloads int8 \n",
|
|
" repo_has_wiki int8 \n",
|
|
" repo_has_pages int8 \n",
|
|
" repo_license string \n",
|
|
" repo_default_branch string \n",
|
|
" repo_created_at datetime \n",
|
|
" repo_updated_at datetime \n",
|
|
" repo_pushed_at datetime \n",
|
|
" pull_review_id int64 \n",
|
|
" pull_review_comment_id int64 \n",
|
|
" pull_review_comment_path string \n",
|
|
" pull_review_comment_position string \n",
|
|
" pull_review_comment_author_id int64 \n",
|
|
" pull_review_comment_author_login string \n",
|
|
" pull_review_comment_author_type string \n",
|
|
" pull_review_comment_author_association string \n",
|
|
" pull_review_comment_body string \n",
|
|
" pull_review_comment_created_at datetime \n",
|
|
" pull_review_comment_updated_at datetime \n",
|
|
" push_id int64 \n",
|
|
" push_size int32 \n",
|
|
" push_distinct_size int32 \n",
|
|
" push_ref string \n",
|
|
" push_head string \n",
|
|
" push_before string \n",
|
|
" push_commits_name list<string> \n",
|
|
" push_commits_email list<string> \n",
|
|
" push_commits_message list<string> \n",
|
|
" fork_forkee_id int64 \n",
|
|
" fork_forkee_full_name string \n",
|
|
" fork_forkee_owner_id int64 \n",
|
|
" fork_forkee_owner_login string \n",
|
|
" fork_forkee_owner_type string \n",
|
|
" delete_ref string \n",
|
|
" delete_ref_type string \n",
|
|
" delete_pusher_type string \n",
|
|
" create_ref string \n",
|
|
" create_ref_type string \n",
|
|
" create_master_branch string \n",
|
|
" create_description string \n",
|
|
" create_pusher_type string \n",
|
|
" gollum_pages_page_name list<string> \n",
|
|
" gollum_pages_title list<string> \n",
|
|
" gollum_pages_action list<string> \n",
|
|
" member_login string \n",
|
|
" member_type string \n",
|
|
" member_id int64 \n",
|
|
" release_id int64 \n",
|
|
" release_tag_name string \n",
|
|
" release_target_commitish string \n",
|
|
" release_name string \n",
|
|
" release_draft int8 \n",
|
|
" release_author_id int64 \n",
|
|
" release_author_login string \n",
|
|
" release_author_type string \n",
|
|
" release_prerelease int8 \n",
|
|
" release_created_at datetime \n",
|
|
" release_published_at datetime \n",
|
|
" release_body string \n",
|
|
" release_assets_name list<string> \n",
|
|
" release_assets_uploader_login list<string> \n",
|
|
" release_assets_uploader_id list<string> \n",
|
|
" release_assets_content_type list<string> \n",
|
|
" release_assets_state list<string> \n",
|
|
" release_assets_size list<string> \n",
|
|
" release_assets_download_count list<string> \n",
|
|
" commit_comment_id int64 \n",
|
|
" commit_comment_author_id int64 \n",
|
|
" commit_comment_author_login string \n",
|
|
" commit_comment_author_type string \n",
|
|
" commit_comment_author_association string \n",
|
|
" commit_comment_body string \n",
|
|
" commit_comment_path string \n",
|
|
" commit_comment_position string \n",
|
|
" commit_comment_line string \n",
|
|
" commit_comment_created_at datetime \n",
|
|
" commit_comment_updated_at datetime \n",
|
|
" pt string \n",
|
|
"}\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from odps import ODPS\n",
|
|
"from odps import options\n",
|
|
"from odps.df import DataFrame\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"pd.set_option('display.max_rows',None)\n",
|
|
"\n",
|
|
"ACCESS_ID = 'LTAI5t9uwJrh5eJ7Q5E37D1s'\n",
|
|
"SECRET_ACCESS_KEY = 'NCFHOAnvqfnTrpypgR4b3cNawP8fnB'\n",
|
|
"ODPS_PROJECT = 'OpenDigger_prod_dev'\n",
|
|
"ODPS_ENDPOINT = 'http://service.cn-shanghai.maxcompute.aliyun.com/api'\n",
|
|
"\n",
|
|
"o = ODPS(ACCESS_ID, SECRET_ACCESS_KEY,\n",
|
|
" project=ODPS_PROJECT, endpoint=ODPS_ENDPOINT)\n",
|
|
"options.tunnel.limit_instance_tunnel = False\n",
|
|
"# options.read_timeout = 10000000\n",
|
|
"\n",
|
|
"users = DataFrame(o.get_table('ods_github_users'))\n",
|
|
"print(users.dtypes)\n",
|
|
"\n",
|
|
"github_log = DataFrame(o.get_table('ods_github_log'))\n",
|
|
"print(github_log.dtypes)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sql = '''\n",
|
|
" select type, count(repo_id), repo_id from ods_github_log\n",
|
|
" where pt='20151001'\n",
|
|
" and type in ('PullRequestEvent','WatchEvent','ForkEvent','IssueCommentEvent')\n",
|
|
" group by type, repo_id;\n",
|
|
"'''\n",
|
|
"\n",
|
|
"result = o.execute_sql(sql, hints={'odps.sql.allow.fullscan': 'true', 'odps.sql.submit.mode': 'script'})\n",
|
|
"with open('data\\count.txt', 'w') as f:\n",
|
|
" with result.open_reader() as reader:\n",
|
|
" for record in reader:\n",
|
|
" type = record['type']\n",
|
|
" count = record['_c1']\n",
|
|
" repo_id = record['repo_id'] \n",
|
|
" f.write('type: {type}, repo_id: {repo_id}, count: {count}\\n'.format(\n",
|
|
" type=type,\n",
|
|
" repo_id=repo_id,\n",
|
|
" count=count)) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"graph_dict = {}\n",
|
|
"graph_dict['PullRequestEvent'] = {}\n",
|
|
"graph_dict['WatchEvent'] = {}\n",
|
|
"graph_dict['ForkEvent'] = {}\n",
|
|
"graph_dict['IssueCommentEvent'] = {}\n",
|
|
"sql = '''\n",
|
|
" select type, repo_id, actor_id\n",
|
|
" from ods_github_log\n",
|
|
" where pt='20151001'\n",
|
|
" and type in ('PullRequestEvent','WatchEvent','ForkEvent','IssueCommentEvent')\n",
|
|
" group by type, repo_id, actor_id;\n",
|
|
"'''\n",
|
|
"result = o.execute_sql(sql, hints={'odps.sql.allow.fullscan': 'true', 'odps.sql.submit.mode': 'script'})\n",
|
|
"with result.open_reader() as reader:\n",
|
|
" for record in reader:\n",
|
|
" type = record['type']\n",
|
|
" actor_id = record['actor_id']\n",
|
|
" repo_id = record['repo_id']\n",
|
|
" if actor_id not in graph_dict[type]:\n",
|
|
" graph_dict[type][actor_id] = []\n",
|
|
" graph_dict[type][actor_id].append(str(repo_id))\n",
|
|
"\n",
|
|
"# print(graph_dict)\n",
|
|
"\n",
|
|
"with open('data\\PullRequestEvent.txt', 'w') as f:\n",
|
|
" for key in graph_dict['PullRequestEvent']:\n",
|
|
" if len(graph_dict['PullRequestEvent'][key]) < 2:\n",
|
|
" continue\n",
|
|
" f.write('actor_id: {actor_id}\\nrepo_id: {list}\\n'.format(actor_id=key, list=' '.join(graph_dict['PullRequestEvent'][key])))\n",
|
|
"\n",
|
|
"with open('data\\WatchEvent.txt', 'w') as f:\n",
|
|
" for key in graph_dict['WatchEvent']:\n",
|
|
" if len(graph_dict['WatchEvent'][key]) < 2:\n",
|
|
" continue\n",
|
|
" f.write('actor_id: {actor_id}\\nrepo_id: {list}\\n'.format(actor_id=key, list=' '.join(graph_dict['WatchEvent'][key])))\n",
|
|
"\n",
|
|
"with open('data\\ForkEvent.txt', 'w') as f:\n",
|
|
" for key in graph_dict['ForkEvent']:\n",
|
|
" if len(graph_dict['ForkEvent'][key]) < 2:\n",
|
|
" continue\n",
|
|
" f.write('actor_id: {actor_id}\\nrepo_id: {list}\\n'.format(actor_id=key, list=' '.join(graph_dict['ForkEvent'][key])))\n",
|
|
"\n",
|
|
"with open('data\\IssueCommentEvent.txt', 'w') as f:\n",
|
|
" for key in graph_dict['IssueCommentEvent']:\n",
|
|
" if len(graph_dict['IssueCommentEvent'][key]) < 2:\n",
|
|
" continue\n",
|
|
" f.write('actor_id: {actor_id}\\nrepo_id: {list}\\n'.format(actor_id=key, list=' '.join(graph_dict['IssueCommentEvent'][key])))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"interpreter": {
|
|
"hash": "caac794e4b8e34bcc9a4d9e1a06492e263031294735d822cbf2db7854bb6c6da"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3.10.4 64-bit (windows store)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.4"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|