Browse Source

to gitea shuishan

master
bnc1010 2 years ago
parent
commit
543a3d92ad
110 changed files with 32539 additions and 0 deletions
  1. BIN
      image/image-20230328110921250.png
  2. BIN
      image/image-20230328111052680.png
  3. BIN
      image/image-20230328134641189.png
  4. BIN
      image/image-20230328134956286.png
  5. BIN
      image/image-20230328135130745.png
  6. BIN
      image/image-20230328135236524.png
  7. +182
    -0
      oj机试手册.md
  8. +160
    -0
      域名&证书手册.md
  9. +28
    -0
      天梯s3令牌更新.md
  10. BIN
      天梯手册/codalab-notebook使用说明.docx
  11. BIN
      天梯手册/助教手册/figure/1.png
  12. BIN
      天梯手册/助教手册/figure/10.png
  13. BIN
      天梯手册/助教手册/figure/11.png
  14. BIN
      天梯手册/助教手册/figure/12.png
  15. BIN
      天梯手册/助教手册/figure/13.png
  16. BIN
      天梯手册/助教手册/figure/14.png
  17. BIN
      天梯手册/助教手册/figure/15.png
  18. BIN
      天梯手册/助教手册/figure/16.png
  19. BIN
      天梯手册/助教手册/figure/17.png
  20. BIN
      天梯手册/助教手册/figure/18.png
  21. BIN
      天梯手册/助教手册/figure/19.png
  22. BIN
      天梯手册/助教手册/figure/2.png
  23. BIN
      天梯手册/助教手册/figure/20.png
  24. BIN
      天梯手册/助教手册/figure/21.png
  25. BIN
      天梯手册/助教手册/figure/22.png
  26. BIN
      天梯手册/助教手册/figure/23.png
  27. BIN
      天梯手册/助教手册/figure/24.png
  28. BIN
      天梯手册/助教手册/figure/25.png
  29. BIN
      天梯手册/助教手册/figure/26.png
  30. BIN
      天梯手册/助教手册/figure/27.png
  31. BIN
      天梯手册/助教手册/figure/28.png
  32. BIN
      天梯手册/助教手册/figure/29.png
  33. BIN
      天梯手册/助教手册/figure/3.png
  34. BIN
      天梯手册/助教手册/figure/30.png
  35. BIN
      天梯手册/助教手册/figure/31.png
  36. BIN
      天梯手册/助教手册/figure/32.png
  37. BIN
      天梯手册/助教手册/figure/33.png
  38. BIN
      天梯手册/助教手册/figure/34.png
  39. BIN
      天梯手册/助教手册/figure/35.png
  40. BIN
      天梯手册/助教手册/figure/36.png
  41. BIN
      天梯手册/助教手册/figure/37.png
  42. BIN
      天梯手册/助教手册/figure/38.png
  43. BIN
      天梯手册/助教手册/figure/39.png
  44. BIN
      天梯手册/助教手册/figure/4.png
  45. BIN
      天梯手册/助教手册/figure/40.png
  46. BIN
      天梯手册/助教手册/figure/41.png
  47. BIN
      天梯手册/助教手册/figure/42.png
  48. BIN
      天梯手册/助教手册/figure/43.png
  49. BIN
      天梯手册/助教手册/figure/44.png
  50. BIN
      天梯手册/助教手册/figure/45.png
  51. BIN
      天梯手册/助教手册/figure/46.png
  52. BIN
      天梯手册/助教手册/figure/47.png
  53. BIN
      天梯手册/助教手册/figure/48.png
  54. BIN
      天梯手册/助教手册/figure/49.png
  55. BIN
      天梯手册/助教手册/figure/5.png
  56. BIN
      天梯手册/助教手册/figure/50.png
  57. BIN
      天梯手册/助教手册/figure/51.png
  58. BIN
      天梯手册/助教手册/figure/6.png
  59. BIN
      天梯手册/助教手册/figure/7.png
  60. BIN
      天梯手册/助教手册/figure/8.png
  61. BIN
      天梯手册/助教手册/figure/9.png
  62. +150
    -0
      天梯手册/助教手册/助教手册.md
  63. BIN
      天梯手册/学生手册/学生使用手册.docx
  64. BIN
      天梯手册/学生手册/学生使用手册.pdf
  65. +189
    -0
      天梯机试手册.md
  66. +2668
    -0
      工具&文件/ASTNN-clone/.ipynb_checkpoints/未命名-checkpoint.ipynb
  67. BIN
      工具&文件/ASTNN-clone/__pycache__/code.cpython-37.pyc
  68. BIN
      工具&文件/ASTNN-clone/__pycache__/model.cpython-37.pyc
  69. BIN
      工具&文件/ASTNN-clone/__pycache__/prepare_data.cpython-37.pyc
  70. BIN
      工具&文件/ASTNN-clone/__pycache__/tree.cpython-37.pyc
  71. +39
    -0
      工具&文件/ASTNN-clone/code.py
  72. BIN
      工具&文件/ASTNN-clone/data/c/ast.pkl
  73. BIN
      工具&文件/ASTNN-clone/data/c/dev/blocks.pkl
  74. BIN
      工具&文件/ASTNN-clone/data/c/dev/dev_.pkl
  75. +13256
    -0
      工具&文件/ASTNN-clone/data/c/id_pair.csv
  76. BIN
      工具&文件/ASTNN-clone/data/c/id_pair.pkl
  77. BIN
      工具&文件/ASTNN-clone/data/c/newproblems
  78. +11300
    -0
      工具&文件/ASTNN-clone/data/c/newproblems.csv
  79. BIN
      工具&文件/ASTNN-clone/data/c/newproblems.pkl
  80. BIN
      工具&文件/ASTNN-clone/data/c/node_w2v_128
  81. BIN
      工具&文件/ASTNN-clone/data/c/oj_clone_ids.pkl
  82. BIN
      工具&文件/ASTNN-clone/data/c/programs.pkl
  83. BIN
      工具&文件/ASTNN-clone/data/c/test/blocks.pkl
  84. BIN
      工具&文件/ASTNN-clone/data/c/test/test_.pkl
  85. BIN
      工具&文件/ASTNN-clone/data/c/train/blocks.pkl
  86. BIN
      工具&文件/ASTNN-clone/data/c/train/train_.pkl
  87. +199
    -0
      工具&文件/ASTNN-clone/model.py
  88. BIN
      工具&文件/ASTNN-clone/model/model_clone_c.pkl
  89. +193
    -0
      工具&文件/ASTNN-clone/pipeline.py
  90. +45
    -0
      工具&文件/ASTNN-clone/prepare_data.py
  91. +90
    -0
      工具&文件/ASTNN-clone/test.py
  92. +246
    -0
      工具&文件/ASTNN-clone/train.py
  93. +170
    -0
      工具&文件/ASTNN-clone/tree.py
  94. +80
    -0
      工具&文件/ASTNN-clone/utils.py
  95. +2668
    -0
      工具&文件/ASTNN-clone/work.ipynb
  96. +7
    -0
      工具&文件/ASTNN-clone/说明.md
  97. +27
    -0
      工具&文件/certs/9085819__shuishan.net.cn.key
  98. +76
    -0
      工具&文件/certs/9085819__shuishan.net.cn.pem
  99. +43
    -0
      工具&文件/jupyter-image-mladder/base/Dockerfile
  100. +7
    -0
      工具&文件/jupyter-image-mladder/base/enterpoint.sh

BIN
image/image-20230328110921250.png View File

Before After
Width: 828  |  Height: 775  |  Size: 44 KiB

BIN
image/image-20230328111052680.png View File

Before After
Width: 828  |  Height: 775  |  Size: 52 KiB

BIN
image/image-20230328134641189.png View File

Before After
Width: 2560  |  Height: 1360  |  Size: 241 KiB

BIN
image/image-20230328134956286.png View File

Before After
Width: 2560  |  Height: 1360  |  Size: 334 KiB

BIN
image/image-20230328135130745.png View File

Before After
Width: 2337  |  Height: 187  |  Size: 34 KiB

BIN
image/image-20230328135236524.png View File

Before After
Width: 2560  |  Height: 1360  |  Size: 316 KiB

+ 182
- 0
oj机试手册.md View File

@ -0,0 +1,182 @@
## 1 平台部署
#### 1.1 部署流程:
1.把原来的服务器/data数据scp到新服务器的/data
在数据存放的机器上切换到相应目录并执行 scp ./data 用户名(新)@ip(新):/
2.把docker-compose.yml文件复制到新服务器
3.切换到yml文件所在目录,执行sudo docker-compose up -d
#### 1.2 docker-compose.yml
```
version: "3"
services:
oj-redis:
image: redis:4.0-alpine
container_name: oj-redis
restart: always
volumes:
- /data/data/redis:/data
oj-postgres:
image: postgres:10-alpine
container_name: oj-postgres
restart: always
command: postgres -c max_connections=1000
ports:
- "127.0.0.1:12348:5432"
volumes:
- /data/data/postgres:/var/lib/postgresql/data
environment:
- POSTGRES_DB=onlinejudge
- POSTGRES_USER=onlinejudge
- POSTGRES_PASSWORD=onlinejudge
judge-server:
image: registry.cn-hangzhou.aliyuncs.com/wsl/judge_server
container_name: judge-server
restart: always
read_only: true
cap_drop:
- SETPCAP
- MKNOD
- NET_BIND_SERVICE
- SYS_CHROOT
- SETFCAP
- FSETID
tmpfs:
- /tmp
volumes:
- /data/backend/test_case:/test_case:ro
- /data/judge_server/log:/log
- /data/judge_server/run:/judger
environment:
- SERVICE_URL=http://judge-server:8080
- BACKEND_URL=http://oj-backend:8000/api/judge_server_heartbeat/
- TOKEN=DASETALENT
oj-backend:
image: registry.cn-hangzhou.aliyuncs.com/wsl/oj_backend
container_name: oj-backend
restart: always
depends_on:
- oj-redis
- oj-postgres
- judge-server
volumes:
- /data/data/backend:/data
- /data/data/app:/app
environment:
- POSTGRES_DB=onlinejudge
- POSTGRES_USER=onlinejudge
- POSTGRES_PASSWORD=onlinejudge
- JUDGE_SERVER_TOKEN=DASETALENT
# - FORCE_HTTPS=1
# - STATIC_CDN_HOST=cdn.oj.com
ports:
- "0.0.0.0:80:8000"
- "0.0.0.0:443:1443"
```
## 2 题目创建要求
#### 2.1 题面
题目表述准确无歧义
明确给出输入数据范围
#### 2.2 测试用例
题面中给出的sample不应出现在测试用例中
样例中,小样例比例不高于20%,中等规模样例40%~60%,剩下为接近数据最大范围的样例
#### 2.3 标程
出题应准备能够保证正确性的标准程序代码
## 3 验题要求
#### 3.1 不同语言
需要检查相同算法,不同的语言的结果是否相同。
避免出现如c语言可以通过,python实现的相同算法代码超时现象
#### 3.2 验题机制
至少两人验题
​ 出题人提交标程测试,验证测试用例的正确性
​ 非出题人按照做题的流程,阅读题面,检查是否有歧义、表述不清等问题;编写代码进行提交测试。
实现标程的其它语言版本,测试是否出现3.1中的问题,视情况可通过修改题目时间限制。
## 4 机试具体流程
机试前准备阶段
​ 1.机试oj主平台上创建比赛,设置密码,关闭real time rank,设置不可见。创建一个新服务器
​ 2.创建题目,满足2题目创建要求
​ 3.验题,满足3验题要求,验题时需要打开可见,先确保设置了密码,验完及时恢复不可见状态
​ 4.将主平台备份至新服务器,运行服务,并验证可用性。参考1平台部署部分。
考场快开始阶段
​ 5.确认 密码已设置,real time rank为关闭,并且处于不可见状态
马上开始阶段
​ 6.设置比赛为可见,让考生能够进入比赛输入密码页面
​ 7.公布比赛密码
考试中
​ 8.观测平台可用性,若出现服务异常,优先进入服务器,执行sudo docker-compose restart热重启,没恢复则进行操作9;若恢复则继续考试,不需要操作9
​ 9.让考生进入备用服务器考试
考试结束(立刻要做的)
​ 10.修改比赛密码,同时设置为不可见。
成绩统计
​ 11.参考机试结束后续流程.pdf
​ 其中连接数据库部分做出如下变化:
使用SSH通道:
<img src="image/image-20230328110921250.png" alt="image-20230328110921250" style="zoom:50%;" />
连接数据库:
<img src="image/image-20230328111052680.png" alt="image-20230328111052680" style="zoom:50%;" />
数据库、用户名、密码皆为onlinejudge
## 机试结束,手动去做一个永久快照。

+ 160
- 0
域名&证书手册.md View File

@ -0,0 +1,160 @@
证书在文件夹cert中,其中9085819__XXX的过期时间为2024年1月13日星期六 07:59:59
需要一年一换!!!
证书找水杉那边管理的同学要
#### 1天梯证书部署:
/src 天梯项目目录
/src/cert 证书存放目录
/src/.env 天梯环境变量配置文件
步骤:
​ 1.将有效证书放入/src/cert中
​ 2.修改/src/.env文件:
```
...
NGINX_PORT=80
SSL_PORT=443
## 修改这两行中的文件名为新的有效证书名
SSL_CERTIFICATE_N=/app/certs/9085819__shuishan.net.cn.pem
SSL_CERTIFICATE_KEY_N=/app/certs/9085819__shuishan.net.cn.key
##
...
```
​ 3.sudo docker-compose stop && sudo docker-compose start 重启天梯项目
​ 4.访问mladder.shuishan.net.cn,检查是否可以访问
#### 2 校场证书部署:
/jcdata 校场数据文件夹(以实际为准)
/jcdata/backend/ssl 证书存放目录
/jcdata/backend_app/deploy/nginx/nginx.conf nginx配置文件
步骤:
​ 1.将有效证书放入/jcdata/backend/ssl中
​ 2.修改/jcdata/backend_app/deploy/nginx/nginx.conf文件:
```
...
server {
listen 1443 ssl http2 default_server;
server_name _;
ssl_certificate /data/ssl/9085819__shuishan.net.cn.pem;
ssl_certificate_key /data/ssl/9085819__shuishan.net.cn.key;
这两行修改,/data/ssl是容器内的地址,/jcdata/backend/ssl -> /data/ssl
修改后面的文件名就行
ssl_protocols TLSv1.2;
ssl_ciphers ...
}
}
```
校场docker-compose.yml:
```
version: "3"
services:
oj-redis:
image: redis:4.0-alpine
container_name: oj-redis
restart: always
volumes:
- /jcdata/redis:/data
oj-postgres:
image: postgres:10-alpine
container_name: oj-postgres
restart: always
volumes:
- /data/jcdb:/var/lib/postgresql/data
environment:
- POSTGRES_DB=onlinejudge
- POSTGRES_USER=onlinejudge
- POSTGRES_PASSWORD=onlinejudge
judge-server:
image: dasetalent/judgeserver:v2.1
container_name: judge-server
restart: always
read_only: true
cap_drop:
- SETPCAP
- MKNOD
- NET_BIND_SERVICE
- SYS_CHROOT
- SETFCAP
- FSETID
tmpfs:
- /tmp
volumes:
- /jcdata/backend/test_case:/test_case:ro
- /jcdata/judge_server/log:/log
- /jcdata/judge_server/run:/judger
environment:
- SERVICE_URL=http://judge-server:8080
- BACKEND_URL=http://oj-backend:8000/api/judge_server_heartbeat/
- TOKEN=CHANGE_THIS
oj-backend:
image: registry.cn-shanghai.aliyuncs.com/shuishan-data/shuishan-oj-backend:aliyun
container_name: oj-backend
restart: always
depends_on:
- oj-redis
- oj-postgres
- judge-server
volumes:
- /jcdata/backend_app:/app
- /jcdata/backend:/data
environment:
- POSTGRES_DB=onlinejudge
- POSTGRES_USER=onlinejudge
- POSTGRES_PASSWORD=onlinejudge
- JUDGE_SERVER_TOKEN=CHANGE_THIS
# - FORCE_HTTPS=1
# - STATIC_CDN_HOST=cdn.oj.com
ports:
- "0.0.0.0:80:8000"
- "0.0.0.0:443:1443"
```
​ 3.sudo docker-compose stop && sudo docker-compose start 重启校场
​ 4.访问judgefield.shuishan.net.cn,检查是否可以访问

+ 28
- 0
天梯s3令牌更新.md View File

@ -0,0 +1,28 @@
## S3令牌有效期为1年,每过一年都需要更新一次
进入https://edu.ucloud.cn/
账号见dasetalent_host.md
1.选择项目为实验室-陆雪松
![image-20230328134641189](image/image-20230328134641189.png)
进入对象存储&CDN
![image-20230328134956286](image/image-20230328134956286.png)
点击tab标签中的令牌管理
![image-20230328135130745](image/image-20230328135130745.png)
点击查看/编辑按钮
![image-20230328135236524](image/image-20230328135236524.png)
点击重新设置,并设置为1年,点击确定即可

BIN
天梯手册/codalab-notebook使用说明.docx View File


BIN
天梯手册/助教手册/figure/1.png View File

Before After
Width: 1098  |  Height: 449  |  Size: 16 KiB

BIN
天梯手册/助教手册/figure/10.png View File

Before After
Width: 1217  |  Height: 312  |  Size: 54 KiB

BIN
天梯手册/助教手册/figure/11.png View File

Before After
Width: 1094  |  Height: 100  |  Size: 32 KiB

BIN
天梯手册/助教手册/figure/12.png View File

Before After
Width: 1169  |  Height: 180  |  Size: 42 KiB

BIN
天梯手册/助教手册/figure/13.png View File

Before After
Width: 1175  |  Height: 181  |  Size: 50 KiB

BIN
天梯手册/助教手册/figure/14.png View File

Before After
Width: 1097  |  Height: 342  |  Size: 65 KiB

BIN
天梯手册/助教手册/figure/15.png View File

Before After
Width: 1501  |  Height: 55  |  Size: 14 KiB

BIN
天梯手册/助教手册/figure/16.png View File

Before After
Width: 1328  |  Height: 153  |  Size: 42 KiB

BIN
天梯手册/助教手册/figure/17.png View File

Before After
Width: 1137  |  Height: 73  |  Size: 20 KiB

BIN
天梯手册/助教手册/figure/18.png View File

Before After
Width: 1710  |  Height: 214  |  Size: 58 KiB

BIN
天梯手册/助教手册/figure/19.png View File

Before After
Width: 1002  |  Height: 343  |  Size: 18 KiB

BIN
天梯手册/助教手册/figure/2.png View File

Before After
Width: 966  |  Height: 797  |  Size: 34 KiB

BIN
天梯手册/助教手册/figure/20.png View File

Before After
Width: 850  |  Height: 369  |  Size: 19 KiB

BIN
天梯手册/助教手册/figure/21.png View File

Before After
Width: 748  |  Height: 201  |  Size: 6.9 KiB

BIN
天梯手册/助教手册/figure/22.png View File

Before After
Width: 1091  |  Height: 184  |  Size: 18 KiB

BIN
天梯手册/助教手册/figure/23.png View File

Before After
Width: 958  |  Height: 468  |  Size: 99 KiB

BIN
天梯手册/助教手册/figure/24.png View File

Before After
Width: 958  |  Height: 468  |  Size: 99 KiB

BIN
天梯手册/助教手册/figure/25.png View File

Before After
Width: 969  |  Height: 193  |  Size: 19 KiB

BIN
天梯手册/助教手册/figure/26.png View File

Before After
Width: 1224  |  Height: 504  |  Size: 27 KiB

BIN
天梯手册/助教手册/figure/27.png View File

Before After
Width: 985  |  Height: 561  |  Size: 63 KiB

BIN
天梯手册/助教手册/figure/28.png View File

Before After
Width: 908  |  Height: 404  |  Size: 28 KiB

BIN
天梯手册/助教手册/figure/29.png View File

Before After
Width: 821  |  Height: 341  |  Size: 31 KiB

BIN
天梯手册/助教手册/figure/3.png View File

Before After
Width: 971  |  Height: 267  |  Size: 11 KiB

BIN
天梯手册/助教手册/figure/30.png View File

Before After
Width: 804  |  Height: 380  |  Size: 44 KiB

BIN
天梯手册/助教手册/figure/31.png View File

Before After
Width: 804  |  Height: 380  |  Size: 44 KiB

BIN
天梯手册/助教手册/figure/32.png View File

Before After
Width: 630  |  Height: 145  |  Size: 12 KiB

BIN
天梯手册/助教手册/figure/33.png View File

Before After
Width: 510  |  Height: 109  |  Size: 5.1 KiB

BIN
天梯手册/助教手册/figure/34.png View File

Before After
Width: 977  |  Height: 65  |  Size: 8.7 KiB

BIN
天梯手册/助教手册/figure/35.png View File

Before After
Width: 2196  |  Height: 1136  |  Size: 522 KiB

BIN
天梯手册/助教手册/figure/36.png View File

Before After
Width: 2196  |  Height: 1136  |  Size: 111 KiB

BIN
天梯手册/助教手册/figure/37.png View File

Before After
Width: 2173  |  Height: 1196  |  Size: 309 KiB

BIN
天梯手册/助教手册/figure/38.png View File

Before After
Width: 981  |  Height: 642  |  Size: 115 KiB

BIN
天梯手册/助教手册/figure/39.png View File

Before After
Width: 749  |  Height: 421  |  Size: 23 KiB

BIN
天梯手册/助教手册/figure/4.png View File

Before After
Width: 895  |  Height: 520  |  Size: 63 KiB

BIN
天梯手册/助教手册/figure/40.png View File

Before After
Width: 1694  |  Height: 1109  |  Size: 170 KiB

BIN
天梯手册/助教手册/figure/41.png View File

Before After
Width: 1694  |  Height: 1109  |  Size: 165 KiB

BIN
天梯手册/助教手册/figure/42.png View File

Before After
Width: 2002  |  Height: 1127  |  Size: 494 KiB

BIN
天梯手册/助教手册/figure/43.png View File

Before After
Width: 2560  |  Height: 1360  |  Size: 355 KiB

BIN
天梯手册/助教手册/figure/44.png View File

Before After
Width: 2560  |  Height: 1360  |  Size: 362 KiB

BIN
天梯手册/助教手册/figure/45.png View File

Before After
Width: 2196  |  Height: 1150  |  Size: 522 KiB

BIN
天梯手册/助教手册/figure/46.png View File

Before After
Width: 1694  |  Height: 1109  |  Size: 169 KiB

BIN
天梯手册/助教手册/figure/47.png View File

Before After
Width: 1694  |  Height: 1109  |  Size: 131 KiB

BIN
天梯手册/助教手册/figure/48.png View File

Before After
Width: 1207  |  Height: 520  |  Size: 69 KiB

BIN
天梯手册/助教手册/figure/49.png View File

Before After
Width: 1269  |  Height: 396  |  Size: 43 KiB

BIN
天梯手册/助教手册/figure/5.png View File

Before After
Width: 912  |  Height: 436  |  Size: 84 KiB

BIN
天梯手册/助教手册/figure/50.png View File

Before After
Width: 1400  |  Height: 618  |  Size: 74 KiB

BIN
天梯手册/助教手册/figure/51.png View File

Before After
Width: 1212  |  Height: 367  |  Size: 19 KiB

BIN
天梯手册/助教手册/figure/6.png View File

Before After
Width: 1552  |  Height: 367  |  Size: 49 KiB

BIN
天梯手册/助教手册/figure/7.png View File

Before After
Width: 1552  |  Height: 367  |  Size: 49 KiB

BIN
天梯手册/助教手册/figure/8.png View File

Before After
Width: 809  |  Height: 836  |  Size: 62 KiB

BIN
天梯手册/助教手册/figure/9.png View File

Before After
Width: 862  |  Height: 534  |  Size: 101 KiB

+ 150
- 0
天梯手册/助教手册/助教手册.md View File

@ -0,0 +1,150 @@
#水杉天梯——助教使用手册
本手册主要展示了助教创建一个作业,上传作业需要的评测文件,上传作业需要的数
据集的具体流程,以及如何修改上传的数据集和编辑已创建作业。手册中针对多阶段
的文件结果提交给出了命名规范,规定了模板文件中的打包函数。
##一、创建作业
###作业描述部分
该部分将详细介绍表单中每个字段的含义,以及应该如何填写。
![avatar](./figure/1.png)
**作业名称——中文名**:对应作业名。
**作业名称——英文名**:提交表单后生成的**zip压缩包名称**。
**图标**:如不上传,将使用默认的logo作为作业logo。(仅支持常用图片格式)
###Web Page部分
![avatar](./figure/2.png)
![avatar](./figure/3.png)
**概述,数据描述,评估,限制条件**使用的是富文本编辑器,可以对文本框中的内容进行加粗,斜体,调整字体大小,上色等操作。
![avatar](./figure/4.png)
这些文本框的内容将会以**html**的形式展现在生成的作业界面上。对应关系为:
**概述**对应——**Overview**;**评估**对应——**Evaluation**;**限制条件**对应——**Terms and Condictions**;**数据描述**对应——**Get Data**。下图用红框圈出。
![avatar](./figure/5.png)
![avatar](./figure/6.png)
###多阶段部分
设置了多阶段机制,允许一次作业分多个阶段提交。
**(解释:多个阶段可以理解为一次作业的两个部分,例如实现svm/决策树分别完成同一个分类任务,将会对应两个leaderboard)**
![avatar](./figure/7.png)
**每日最高提交数**:该作业允许每日最高的提交数。
**最高提交数**:该作业开始到结束期间允许的提交数之和。
**阶段数量**:填写该作业分为几个阶段。填写完以后点击确认,会自动生成相应数量的阶段填写框。
![avatar](./figure/8.png)
点击确认后,会出现对应数量的阶段内容框,每个阶段需要填写:
**代码评测文件**:.py格式,助教自定义填写,如需参考可点击超链接。
**作业的参考预测文件**:助教上传用于评测的正确结果文件。
**每个阶段的开始时间**:当前阶段学生可以提交文件的开始时间。
![avatar](./figure/9.png)
生成的作业将会分为多个阶段。点击每个阶段,可以提交对应阶段需要上传的文件。
**对于评测示例文件的补充说明**:
**文件定位**:若交由本系统进行评测,该段代码无需改动。若在本地测试,则直接修改路径即可。
![avatar](./figure/10.png)
**文件读取**:需要读取学生结果(prediction.txt)和参考结果(true.txt),注意学生结果文件的命名应要求学生固定命名(可在Jupyter模板文件中写好),参考结果文件命名在作业创建中上传的文件名一致。
![avatar](./figure/11.png)
![avatar](./figure/12.png)
![avatar](./figure/13.png)
两个文件里的数据内容格式应保持一致,通过修改自定义的read_txt函数来读取不同的格式。
![avatar](./figure/14.png)
**结果评测**:编写自定义的calculate_metric函数以满足不同的指标评测。(如果需要得到多个指标的结果,可自行丰富,得到多个score),以下为计算准确率的示例。
![avatar](./figure/15.png)
![avatar](./figure/16.png)
**结果输出**:输出文件要求写入规定名为(scores.txt)的文件,不能修改。输出文件的每一行代表对应的一个指标的分数结果,指标名、和小数点位数应与作业创建leaderboard处的填写内容一致。
![avatar](./figure/17.png)
![avatar](./figure/18.png)
**请助教在写完Evaluate.py后先在本地试运行,输出文件结果无误后再进行上传!**
###排行榜部分
![avatar](./figure/19.png)
**评估指标个数**:填写作业提交的评估指标个数,有的作业可能需要不只一个评估指标。填写评估指标个数将生成下图所示的指标框。
**评估标签**:填写评估标签名,如ACC,Precision,Recall等。
**数据格式**:保留几位小数。
**排序**:指标排序是升序还是降序。只能填**asc或desc**。
###其他
![avatar](./figure/20.png)
**作业权限**:选择作业参与是否需要助教同意。如选择是,则参与作业需要助教后台审批,如选择否,则申请参与作业后自动通过请求。
开始时间,截至时间:填写作业开始和结束的时间。
![avatar](./figure/21.png)
点击生成zip文件,将生成一个zip压缩包,点击上图中蓝色超链接生成压缩包。(**表单中除了图标可以不填,其余所有均为必填选项。**)
下载得到压缩包以后,点击上传zip文件,将生成一个新的作业。(**压缩包文件名不能含中文,空格等。**)
![avatar](./figure/22.png)
上传成功后,可以点击蓝色view超链接,查看具体作业信息。
##编辑作业
如果需要编辑,在作业详情界面中点击选项——编辑。
![avatar](./figure/23.png)
备注:编辑页面表单数量较多,此处只挑选助教有可能修改的部分做详细讲述,如需更深层次的编辑,请参考
https://github.com/codalab/codalab-competitions/wiki/Organizer_Codalab-competition-YAML-definition-language
修改字段内容。
###作业描述部分
![avatar](./figure/25.png)
参考第一部分创建作业的作业描述部分。
**Title**对应——**作业的中文名**。
**Description**对应——**描述**。
##Web Page部分
![avatar](./figure/26.png)
编辑Web Pages,对应关系如一中Web Pages部分。
###多阶段部分
![avatar](./figure/27.png)
**Start Date(UTC)**:修改每个phase的开始时间。
**Maximum Submissions(per User)**:修改作业最多允许的提交数之和。
**Max Submissions(per User) per day**:修改作业每人每天最多允许的提交数。
###排行榜部分
![avatar](./figure/28.png)
![avatar](./figure/29.png)
**Key Label**必须**相同**,对应——**评估标签**。
**Numberic format**对应——**数据格式**。
**Sorting**对应——**排序**
###其他
![avatar](./figure/30.png)
**Organizers need to approve the new teams**对应——第一部分中的作业权限。
A**nonymous leaderboard**——排行榜用户名是否匿名
![avatar](./figure/32.png)
**Disallow leaderboard modifying**——提交是否可以被修改
**Force submission to leaderboard**——学生是否需要人工提交结果
![avatar](./figure/33.png)
**Registration Required**——学生是否需要经过助教同意才能参与作业
![avatar](./figure/34.png)
如果提交的结果比过去成绩好,则自动将当前结果提交到排行榜
##上传数据集
点击上传数据集按钮打开面板
![avatar](./figure/35.png)
![avatar](./figure/36.png)
点击选择文件选择数据集压缩包,然后点击提交按钮进行提交。
提交按钮下方会显示上传进度,提交完成后会有对话框提示完成。
![avatar](./figure/37.png)
数据集格式说明:
文件中需要包含input文件夹,其中放置数据集,可以有一个模板性的notebook文件
参考格式如下:
![avatar](./figure/38.png)
需要注意的是,压缩时需要选中这一级目录下所有文件及文件夹进行压缩,而不是对上一级目录进行压缩
上传后,学生视角的目录如下图所示:
![avatar](./figure/39.png)
其中read_only_sample文件为只读文件,作为备份
多阶段提交需要注意,需要额外加一个output文件夹,该目录下的结构如下图所示:
![avatar](./figure/40.png)
![avatar](./figure/41.png)
有几个阶段就添加多少个phase_x文件夹。phase_x文件夹用于保存学生对应第x阶段代码输出结果。
##四、重置数据集
点击重置Jupyter按钮,可以将该作业对应的notebook删除,学生打开notebook时会以最新上传的数据集作为模板。
![avatar](./figure/42.png)
使用情形:上传数据集有误需要更新时,已有学生打开了notebook,重新上传数据集不会更新已打开notebook的文件,需要点击该按钮解决。
![avatar](./figure/43.png)
![avatar](./figure/44.png)
##五、下载学生代码
![avatar](./figure/45.png)
点击学生代码按钮即可下载
![avatar](./figure/46.png)
可以下载所有学生的代码,学生文件以学号命名
![avatar](./figure/47.png)
学生编写的notebook文件或py文件都会在对应目录下。
##六、模板notebook文件说明
模板文件中包含三块内容:
1.具体题目的程序逻辑
![avatar](./figure/48.png)
2.输出程序,output_string存储程序输出结果, phase_id为阶段id,每个阶段的程序输出分开存放
![avatar](./figure/49.png)
3.打包程序
![avatar](./figure/50.png)
需要改变的主要是第一部分,后两部分为固定代码,使学生按照系统规定地输出结果和打包结果。
![avatar](./figure/51.png)
点击submit按钮可以将prediction_phase_1.zip提交到phase1的排行榜中。

BIN
天梯手册/学生手册/学生使用手册.docx View File


BIN
天梯手册/学生手册/学生使用手册.pdf View File


+ 189
- 0
天梯机试手册.md View File

@ -0,0 +1,189 @@
## 1资源准备
根据模型规模,合理安排每个容器使用的资源额度,计算所需要的总资源数目
如100个学生,每个容器需要1C,4G
那么总资源需要100C,400G
在K8S系统中添加工作节点,满足资源总需求*115%,其中15%为余量,可以多一点。
考试的节点手动添加标签cal_type:cpu/gpu ntype:exam
对于exam的镜像会全部调度到这些节点中,普通教学的镜像只会被调度在ntype:study的节点上,以此实现考试和教学的分离,保障考试稳定性
对于gpu的镜像只会被调度到cal_type:gpu的节点上,cpu的镜像同理。
## 2环境准备
根据考试实际需要,build一个镜像
打包过程:
文件在jupyter-image-mladder文件夹中
其中base是基镜像,先本地build该镜像
​ cd base
​ sudo docker build -t mld:v1 .
其它几个镜像中的FROM字段内容要修改为刚刚build的mld:v1
安装包只需要仿照下面代码,用conda或者pip3安装即可,推荐使用一些镜像源,速度较快
cudnn等一些包不能用pip安装,可以用conda,自行google搜索
```
FROM mld:v1 #这个要对应自己build的镜像
ARG NB_USER="jupyter"
ARG NB_UID="1000"
ARG NB_GID="100"
ARG NB_PORT=8888
USER root
###### 以下是装包:
RUN conda install pytorch torchvision torchaudio cpuonly -c pytorch \
&& conda install tensorflow
RUN pip3 install numpy seaborn sklearn h5py matplotlib pandas future imageio -i https://pypi.mirrors.ustc.edu.cn/simple/
RUN pip3 install lightgbm xgboost imblearn mindspore -i https://pypi.mirrors.ustc.edu.cn/simple/
RUN pip3 install keras -i https://pypi.mirrors.ustc.edu.cn/simple/
######
ENV HOME="/home/${NB_USER}"
USER ${NB_UID}
ENTRYPOINT [ "/enterpoint.sh" ]
```
不同环境,只需要修改######之间的内容即可,别的代码一般不需要修改
打包好镜像后,需要上传到dockerhub
dasetalent账户密码查阅 dassetalent_host.md文件
docker push 到dockerhub后,进入考试专用的节点(可以直接ssh,也可以在阿里云k8s管理模块workbench远程连接功能),手动docker pull dasetalent/xxxxxx,其中xxxxxx为上传的镜像,要带版本号
下面步骤为暂时性:
进入天梯服务器
目录/home/lwttest
修改/home/lwttest/config.json文件
```
{"version": "v1.2.1", "images": {
"old": {"image": "bnc1010/old_notebook:v1", "use_gpu": false, "workdir": "/home/public/", "node_select":{"ntype":"study"}},
"torch-gpu": {"image": "bnc1010/mladder_notebook_torchgpu:v0.3", "use_gpu": true, "workdir": "/home/jupyter/", "node_select":{"ntype":"study"}},
"tensorflow-gpu": {"image": "bnc1010/mladder_notebook_tensorflowgpu:v0.2", "use_gpu": true, "workdir": "/home/jupyter/", "node_select":{"ntype":"study"}},
"tensorflow-pytorch-cpu(exam)": {"image": "bnc1010/mladder_notebook_torch_tf_sk:v1.6", "use_gpu": false, "workdir": "/home/jupyter/", "node_select":{"ntype":"exam"}}
},
"node_ips": ["47.100.69.138", "139.224.216.129"],
"gpu_maxn": 0,
"gpu_notebook": {}}
```
这里以tensorflow-pytorch-cpu(exam)为例,这是一个考试专用镜像,它有四个参数:
```
{
"image": "bnc1010/mladder_notebook_torch_tf_sk:v1.6",
"use_gpu": false,
"workdir": "/home/jupyter/",
"node_select":{"ntype":"exam"}
}
```
image: dockerhub中能直接pull的镜像名
use_gpu: 是否是需要GPU的
workdir: notebook的工作地址
node_select: 节点标签选择
这里node_select中有一个ntype:exam,表明该镜像起的容器只会在考试专用节点上
其它几个镜像起的容器只会在带有ntype:study标签的节点上
**在该配置文件添加好新镜像后,重启脚本/home/lwttest/workServer.py**
目前是使用screen挂载 screen -x notebook即可进入, ctrl c 终端,然后python3 /home/lwttest/workServer.py即可重启
## 3比赛准备
准备好考试需要的比赛
详见天梯助教手册
**设置考试的环境为上述准备好的考试专用环境**
暂时不要publish,测试作业可以使用私密链接,账号可以让测试人员直接从水杉账号,从水杉跳转过来
## 4考试过程
#### 4.1 快开始时
清空k8s考试节点中已打开的容器
配置资源容器资源额度:
```
resources:
requests:
memory: 2Gi
cpu: 800m
limits:
memory: 4Gi
cpu: 2000m
```
#### 4.2 开始
将比赛正式publish
#### 4.3 考试中
检测k8s集群,掌握资源实时状态,如果不足,则需要临时加入新节点
如果有其它节点可用时,一个简单有效的补救:手动修改没有成功开启的deployment的yml,修改其ntype调度到非考试节点上去。
考试内容相关:
如果有文件错误、说明有误等需要修改、增加文件的情况时,手动上传至对应比赛的input文件中。
#### 4.4 考试结束
应该向考生说明保存notebook,避免关闭容器后,代码没有保存而丢失。
在天梯中,点击code按钮下载考生代码、leaderboader排名CSV文件。
保存完毕后,删除考试节点所有的deployment。
设置比赛的环境为空,让考生无法从比赛页面再次进入notebook。
数据盘创建快照。
删除临时work节点。

+ 2668
- 0
工具&文件/ASTNN-clone/.ipynb_checkpoints/未命名-checkpoint.ipynb
File diff suppressed because it is too large
View File


BIN
工具&文件/ASTNN-clone/__pycache__/code.cpython-37.pyc View File


BIN
工具&文件/ASTNN-clone/__pycache__/model.cpython-37.pyc View File


BIN
工具&文件/ASTNN-clone/__pycache__/prepare_data.cpython-37.pyc View File


BIN
工具&文件/ASTNN-clone/__pycache__/tree.cpython-37.pyc View File


+ 39
- 0
工具&文件/ASTNN-clone/code.py View File

@ -0,0 +1,39 @@
# import pickle
# import pandas as pd
# title=[]
# with open('../../submission_202110142140.csv', 'r') as f:
# records = f.readlines()
# title = records[0].replace("\"","").split(',')
# records = pd.read_csv('./submission_202110142140.csv')
# pros = records["code"].to_list()
# id = range(len(pros))
# pm_id = records["problem_id"].to_list()
# dic={}
# for i in range(len(pros)):
# problem_id = pm_id[i]
# if not dic.get(problem_id):
# dic[problem_id] = []
# dic[problem_id].append(i)
# pairs_a = []
# pairs_b = []
# for k,v in dic.items():
# for i in range(len(v)):
# for j in range(i+1, len(v)):
# pairs_a.append(v[i])
# pairs_b.append(v[j])
# pair_data = {'id1': pairs_a, 'id2':pairs_b}
# newpair = pd.DataFrame(pair_data, columns=["id1","id2"])
# newpair.to_csv('./data/c/id_pair.csv')
# Data = {'0': id, '1': pros}
# newdata = pd.DataFrame(Data, columns=["0","1"])
# newdata.to_csv('./data/c/newproblems.csv')

BIN
工具&文件/ASTNN-clone/data/c/ast.pkl View File


BIN
工具&文件/ASTNN-clone/data/c/dev/blocks.pkl View File


BIN
工具&文件/ASTNN-clone/data/c/dev/dev_.pkl View File


+ 13256
- 0
工具&文件/ASTNN-clone/data/c/id_pair.csv
File diff suppressed because it is too large
View File


BIN
工具&文件/ASTNN-clone/data/c/id_pair.pkl View File


BIN
工具&文件/ASTNN-clone/data/c/newproblems View File


+ 11300
- 0
工具&文件/ASTNN-clone/data/c/newproblems.csv
File diff suppressed because it is too large
View File


BIN
工具&文件/ASTNN-clone/data/c/newproblems.pkl View File


BIN
工具&文件/ASTNN-clone/data/c/node_w2v_128 View File


BIN
工具&文件/ASTNN-clone/data/c/oj_clone_ids.pkl View File


BIN
工具&文件/ASTNN-clone/data/c/programs.pkl View File


BIN
工具&文件/ASTNN-clone/data/c/test/blocks.pkl View File


BIN
工具&文件/ASTNN-clone/data/c/test/test_.pkl View File


BIN
工具&文件/ASTNN-clone/data/c/train/blocks.pkl View File


BIN
工具&文件/ASTNN-clone/data/c/train/train_.pkl View File


+ 199
- 0
工具&文件/ASTNN-clone/model.py View File

@ -0,0 +1,199 @@
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch.autograd import Variable
import random
class BatchTreeEncoder(nn.Module):
def __init__(self, vocab_size, embedding_dim, encode_dim, batch_size, use_gpu, pretrained_weight=None):
super(BatchTreeEncoder, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.embedding_dim = embedding_dim
self.encode_dim = encode_dim
self.W_c = nn.Linear(embedding_dim, encode_dim)
self.activation = F.relu
self.stop = -1
self.batch_size = batch_size
self.use_gpu = use_gpu
self.node_list = []
self.th = torch.cuda if use_gpu else torch
self.batch_node = None
self.max_index = vocab_size
# pretrained embedding
if pretrained_weight is not None:
self.embedding.weight.data.copy_(torch.from_numpy(pretrained_weight))
# self.embedding.weight.requires_grad = False
def create_tensor(self, tensor):
if self.use_gpu:
return tensor.cuda()
return tensor
def traverse_mul(self, node, batch_index):
size = len(node)
if not size:
return None
batch_current = self.create_tensor(Variable(torch.zeros(size, self.embedding_dim)))
index, children_index = [], []
current_node, children = [], []
for i in range(size):
# if node[i][0] is not -1:
index.append(i)
current_node.append(node[i][0])
temp = node[i][1:]
c_num = len(temp)
for j in range(c_num):
if temp[j][0] is not -1:
if len(children_index) <= j:
children_index.append([i])
children.append([temp[j]])
else:
children_index[j].append(i)
children[j].append(temp[j])
# else:
# batch_index[i] = -1
batch_current = self.W_c(batch_current.index_copy(0, Variable(self.th.LongTensor(index)),
self.embedding(Variable(self.th.LongTensor(current_node)))))
for c in range(len(children)):
zeros = self.create_tensor(Variable(torch.zeros(size, self.encode_dim)))
batch_children_index = [batch_index[i] for i in children_index[c]]
tree = self.traverse_mul(children[c], batch_children_index)
if tree is not None:
batch_current += zeros.index_copy(0, Variable(self.th.LongTensor(children_index[c])), tree)
# batch_index = [i for i in batch_index if i is not -1]
b_in = Variable(self.th.LongTensor(batch_index))
self.node_list.append(self.batch_node.index_copy(0, b_in, batch_current))
return batch_current
def forward(self, x, bs):
self.batch_size = bs
self.batch_node = self.create_tensor(Variable(torch.zeros(self.batch_size, self.encode_dim)))
self.node_list = []
self.traverse_mul(x, list(range(self.batch_size)))
self.node_list = torch.stack(self.node_list)
return torch.max(self.node_list, 0)[0]
class BatchProgramCC(nn.Module):
def __init__(self, embedding_dim, hidden_dim, vocab_size, encode_dim, label_size, batch_size, use_gpu=True, pretrained_weight=None):
super(BatchProgramCC, self).__init__()
self.stop = [vocab_size-1]
self.hidden_dim = hidden_dim
self.num_layers = 1
self.gpu = use_gpu
self.batch_size = batch_size
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.encode_dim = encode_dim
self.label_size = label_size
self.encoder = BatchTreeEncoder(self.vocab_size, self.embedding_dim, self.encode_dim,
self.batch_size, self.gpu, pretrained_weight)
self.root2label = nn.Linear(self.encode_dim, self.label_size)
# gru
self.bigru = nn.GRU(self.encode_dim, self.hidden_dim, num_layers=self.num_layers, bidirectional=True,
batch_first=True)
# linear
self.hidden2label = nn.Linear(self.hidden_dim * 2, self.label_size)
# hidden
# self.hidden = self.init_hidden()
self.dropout = nn.Dropout(0.2)
def init_hidden(self):
if self.gpu is True:
if isinstance(self.bigru, nn.LSTM):
h0 = Variable(torch.zeros(self.num_layers * 2, self.batch_size, self.hidden_dim).cuda())
c0 = Variable(torch.zeros(self.num_layers * 2, self.batch_size, self.hidden_dim).cuda())
return h0, c0
return Variable(torch.zeros(self.num_layers * 2, self.batch_size, self.hidden_dim)).cuda()
else:
return Variable(torch.zeros(self.num_layers * 2, self.batch_size, self.hidden_dim))
def get_zeros(self, num):
zeros = Variable(torch.zeros(num, self.encode_dim))
if self.gpu:
return zeros.cuda()
return zeros
def encode(self, x):
# print(x)
lens = [len(item) for item in x]
max_len = max(lens)
encodes = []
for i in range(self.batch_size):
for j in range(lens[i]):
encodes.append(x[i][j])
encodes = self.encoder(encodes, sum(lens))
seq, start, end = [], 0, 0
for i in range(self.batch_size):
end += lens[i]
if max_len - lens[i]:
seq.append(self.get_zeros(max_len - lens[i]))
seq.append(encodes[start:end])
start = end
encodes = torch.cat(seq)
encodes = encodes.view(self.batch_size, max_len, -1)
# gru
gru_out, hidden = self.bigru(encodes, self.hidden)
gru_out = torch.transpose(gru_out, 1, 2)
# pooling
gru_out = F.max_pool1d(gru_out, gru_out.size(2)).squeeze(2)
return gru_out
def forward(self, x1, x2):
lvec, rvec = self.encode(x1), self.encode(x2)
# abs_dist = torch.abs(torch.add(lvec, -rvec))
y = F.cosine_similarity(rvec, lvec).view(-1)
# y = torch.sigmoid(self.hidden2label(abs_dist))
return y
# def encode(self, x):
# bs = x.size(0)
# lens = [len(item) for item in x]
# max_len = max(lens)
# encodes = x
# # encodes = []
# # for i in range(self.batch_size):
# # for j in range(lens[i]):
# # encodes.append(x[i][j])
# #
# # encodes = self.encoder(encodes, sum(lens))
# seq, start, end = [], 0, 0
# for i in range(bs):
# end += lens[i]
# if max_len-lens[i]:
# seq.append(self.get_zeros(max_len-lens[i]))
# seq.append(encodes[start:end])
# start = end
# encodes = torch.cat(seq)
# encodes = encodes.view(bs, max_len, -1)
# # return encodes
#
# # gru_out, hidden = self.bigru(encodes, self.hidden)
# gru_out, hidden = self.bigru(encodes)
# gru_out = torch.transpose(gru_out, 1, 2)
# # pooling
# gru_out = F.max_pool1d(gru_out, gru_out.size(2)).squeeze(2)
#
# return gru_out
#
# def forward(self, x1, x2):
# lvec, rvec = self.encode(x1), self.encode(x2)
#
# abs_dist = torch.abs(torch.add(lvec, -rvec))
#
# y = torch.sigmoid(self.hidden2label(abs_dist)).view(x1.size(0), -1)
# t = (~(y > 0.5)).float()
# out = torch.cat([t, y],dim=1)
# return out

BIN
工具&文件/ASTNN-clone/model/model_clone_c.pkl View File


+ 193
- 0
工具&文件/ASTNN-clone/pipeline.py View File

@ -0,0 +1,193 @@
import pandas as pd
import os
import sys
import warnings
warnings.filterwarnings('ignore')
class Pipeline:
def __init__(self, ratio, root, language):
self.ratio = ratio
self.root = root
self.language = language
self.sources = None
self.blocks = None
self.pairs = None
self.train_file_path = None
self.dev_file_path = None
self.test_file_path = None
self.size = None
# parse source code
def parse_source(self, output_file, option):
path = self.root+self.language+'/'+output_file
if os.path.exists(path) and option == 'existing':
source = pd.read_pickle(path)
else:
if self.language is 'c':
from pycparser import c_parser
parser = c_parser.CParser()
source = pd.read_pickle(self.root+self.language+'/programs.pkl')
source.columns = ['id', 'code', 'label']
source['code'] = source['code'].apply(parser.parse)
source.to_pickle(path)
else:
import javalang
def parse_program(func):
tokens = javalang.tokenizer.tokenize(func)
parser = javalang.parser.Parser(tokens)
tree = parser.parse_member_declaration()
return tree
# source = pd.read_csv(self.root+self.language+'/bcb_funcs_all.tsv', sep='\t', header=None, encoding='utf-8')
source = pd.read_csv(self.root + self.language + '/codes.csv')
source.columns = ['id', 'code']
source['code'] = source['code'].apply(parse_program)
source.to_pickle(path)
self.sources = source
return source
# create clone pairs
def read_pairs(self, filename):
pairs = pd.read_pickle(self.root+self.language+'/'+filename)
self.pairs = pairs
# split data for training, developing and testing
def split_data(self):
data_path = self.root+self.language+'/'
data = self.pairs
data_num = len(data)
ratios = [int(r) for r in self.ratio.split(':')]
train_split = int(ratios[0]/sum(ratios)*data_num)
val_split = train_split + int(ratios[1]/sum(ratios)*data_num)
data = data.sample(frac=1, random_state=666)
train = data.iloc[:train_split]
dev = data.iloc[train_split:val_split]
test = data.iloc[val_split:]
def check_or_create(path):
if not os.path.exists(path):
os.mkdir(path)
train_path = data_path+'train/'
check_or_create(train_path)
self.train_file_path = train_path+'train_.pkl'
train.to_pickle(self.train_file_path)
dev_path = data_path+'dev/'
check_or_create(dev_path)
self.dev_file_path = dev_path+'dev_.pkl'
dev.to_pickle(self.dev_file_path)
test_path = data_path+'test/'
check_or_create(test_path)
self.test_file_path = test_path+'test_.pkl'
test.to_pickle(self.test_file_path)
# construct dictionary and train word embedding
def dictionary_and_embedding(self, input_file, size):
self.size = size
data_path = self.root+self.language+'/'
if not input_file:
input_file = self.train_file_path
pairs = pd.read_pickle(input_file)
train_ids = pairs['id1'].append(pairs['id2']).unique()
#trees = self.sources.set_index('id',drop=False).loc[train_ids]
trees = self.sources.set_index('id',drop=False).loc[train_ids[0]]
for i in train_ids[1:]:
tmp_tt = self.sources.set_index('id',drop=False).loc[i]
trees = pd.concat([trees,tmp_tt],axis=0)
if not os.path.exists(data_path+'train/embedding'):
os.mkdir(data_path+'train/embedding')
if self.language is 'c':
sys.path.append('../')
from prepare_data import get_sequences as func
else:
from utils import get_sequence as func
def trans_to_sequences(ast):
sequence = []
func(ast, sequence)
return sequence
corpus = trees['code'].apply(trans_to_sequences)
str_corpus = [' '.join(c) for c in corpus]
trees['code'] = pd.Series(str_corpus)
# trees.to_csv(data_path+'train/programs_ns.tsv')
from gensim.models.word2vec import Word2Vec
w2v = Word2Vec(corpus, size=size, workers=16, sg=1, max_final_vocab=3000)
w2v.save(data_path+'train/embedding/node_w2v_' + str(size))
# generate block sequences with index representations
def generate_block_seqs(self,size):
self.size = size
if self.language is 'c':
from prepare_data import get_blocks as func
else:
from utils import get_blocks_v1 as func
from gensim.models.word2vec import Word2Vec
word2vec = Word2Vec.load(self.root+self.language+'node_w2v_' + str(self.size)).wv
vocab = word2vec.vocab
max_token = word2vec.syn0.shape[0]
def tree_to_index(node):
token = node.token
result = [vocab[token].index if token in vocab else max_token]
children = node.children
for child in children:
result.append(tree_to_index(child))
return result
def trans2seq(r):
blocks = []
func(r, blocks)
tree = []
for b in blocks:
btree = tree_to_index(b)
tree.append(btree)
return tree
trees = pd.DataFrame(self.sources, copy=True)
trees['code'] = trees['code'].apply(trans2seq)
if 'label' in trees.columns:
trees.drop('label', axis=1, inplace=True)
self.blocks = trees
# merge pairs
def merge(self, data_path, part):
pairs = pd.read_pickle(data_path)
pairs['id1'] = pairs['id1'].astype(int)
pairs['id2'] = pairs['id2'].astype(int)
df = pd.merge(pairs, self.blocks, how='left', left_on='id1', right_on='id')
df = pd.merge(df, self.blocks, how='left', left_on='id2', right_on='id')
df.drop(['id_x', 'id_y'], axis=1,inplace=True)
df.dropna(inplace=True)
df.to_pickle(self.root+self.language+'/'+part+'/blocks.pkl')
# run for processing data to train
def run(self):
print('parse source code...')
self.parse_source(output_file='ast.pkl',option='existing')
print('read id pairs...')
if self.language is 'c':
self.read_pairs('oj_clone_ids.pkl')
else:
self.read_pairs('id_pairs.pkl')
# self.read_pairs('bcb_pair_ids.pkl')
print('split data...')
self.split_data()
#print('train word embedding...')
# self.dictionary_and_embedding(None, 128)
print('generate block sequences...')
self.generate_block_seqs(128)
print('merge pairs and blocks...')
self.merge(self.train_file_path, 'train')
self.merge(self.dev_file_path, 'dev')
self.merge(self.test_file_path, 'test')
lang = "c"
ppl = Pipeline('8:1:1', 'data/', lang)
ppl.run()

+ 45
- 0
工具&文件/ASTNN-clone/prepare_data.py View File

@ -0,0 +1,45 @@
from pycparser import c_parser, c_ast
import pandas as pd
import os
import re
import sys
from gensim.models.word2vec import Word2Vec
import pickle
from tree import ASTNode, SingleNode
import numpy as np
def get_sequences(node, sequence):
current = SingleNode(node)
sequence.append(current.get_token())
for _, child in node.children():
get_sequences(child, sequence)
if current.get_token().lower() == 'compound':
sequence.append('End')
def get_blocks(node, block_seq):
children = node.children()
name = node.__class__.__name__
if name in ['FuncDef', 'If', 'For', 'While', 'DoWhile']:
block_seq.append(ASTNode(node))
if name is not 'For':
skip = 1
else:
skip = len(children) - 1
for i in range(skip, len(children)):
child = children[i][1]
if child.__class__.__name__ not in ['FuncDef', 'If', 'For', 'While', 'DoWhile', 'Compound']:
block_seq.append(ASTNode(child))
get_blocks(child, block_seq)
elif name is 'Compound':
block_seq.append(ASTNode(name))
for _, child in node.children():
if child.__class__.__name__ not in ['If', 'For', 'While', 'DoWhile']:
block_seq.append(ASTNode(child))
get_blocks(child, block_seq)
block_seq.append(ASTNode('End'))
else:
for _, child in node.children():
get_blocks(child, block_seq)

+ 90
- 0
工具&文件/ASTNN-clone/test.py View File

@ -0,0 +1,90 @@
import pandas as pd
import torch
import time
import numpy as np
import warnings
from gensim.models.word2vec import Word2Vec
from model import BatchProgramCC
from torch.autograd import Variable
from sklearn.metrics import precision_recall_fscore_support
from tqdm import tqdm
warnings.filterwarnings('ignore')
def get_batch(dataset, idx, bs):
tmp = dataset.iloc[idx: idx+bs]
x1, x2, labels = [], [], []
for _, item in tmp.iterrows():
x1.append(item['code_ids_x'])
x2.append(item['code_ids_y'])
labels.append([item['label']])
return x1, x2, torch.FloatTensor(labels)
if __name__ == '__main__':
lang = 'c'
root = 'data/'
test_data = pd.read_pickle(root+lang+'/test/blocks_new.pkl').sample(frac=1)
word2vec = Word2Vec.load(root+lang+"/node_w2v_128").wv
MAX_TOKENS = word2vec.syn0.shape[0]
EMBEDDING_DIM = word2vec.syn0.shape[1]
embeddings = np.zeros((MAX_TOKENS + 1, EMBEDDING_DIM), dtype="float32")
embeddings[:word2vec.syn0.shape[0]] = word2vec.syn0
HIDDEN_DIM = 100
ENCODE_DIM = 128
LABELS = 1
EPOCHS = 5
BATCH_SIZE = 64
USE_GPU = True
model = BatchProgramCC(EMBEDDING_DIM,HIDDEN_DIM,MAX_TOKENS+1,ENCODE_DIM,LABELS,BATCH_SIZE,
USE_GPU, embeddings)
if USE_GPU:
model.cuda()
parameters = model.parameters()
optimizer = torch.optim.Adamax(parameters)
loss_function = torch.nn.BCELoss()
PATH = './model/model_clone_c.pkl'
checkpoint = torch.load(PATH)
start_epoch = checkpoint['epoch']
model.load_state_dict(checkpoint['model_state_dict'])
test_data_t = test_data
print("Testing..." )
# testing procedure
predicts = []
trues = []
total_loss = 0.0
total = 0.0
i = 0
for i in tqdm(range(0, len(test_data_t), BATCH_SIZE)):
if i + BATCH_SIZE > len(test_data_t):
BATCH_SIZE = len(test_data_t) - i
batch = get_batch(test_data_t, i, BATCH_SIZE)
i += BATCH_SIZE
test1_inputs, test2_inputs, test_labels = batch
if USE_GPU:
test_labels = test_labels.cuda()
model.batch_size = len(test_labels)
model.hidden = model.init_hidden()
output = model(test1_inputs, test2_inputs)
# loss = loss_function(output, Variable(test_labels))
# calc testing acc
predicted = (output.data > 0.5).cpu().numpy()
predicts.extend(predicted)
trues.extend(test_labels.cpu().numpy())
# total += len(test_labels)
# total_loss += loss.item() * len(test_labels)
p, r, f, _ = precision_recall_fscore_support(trues, predicts, average='binary')
print("Testing results(P,R,F1):%.3f, %.3f, %.3f" % (p, r, f))

+ 246
- 0
工具&文件/ASTNN-clone/train.py View File

@ -0,0 +1,246 @@
import pandas as pd
import torch
import time
import numpy as np
import warnings
from gensim.models.word2vec import Word2Vec
from model import BatchProgramCC
from torch.autograd import Variable
from sklearn.metrics import precision_recall_fscore_support
from tqdm import tqdm
warnings.filterwarnings('ignore')
from gensim.models.word2vec import Word2Vec
# word2vec = Word2Vec.load("./train/embedding/node_w2v_128_new").wv
# word2vec.index2word
def get_batch(dataset, idx, bs):
tmp = dataset.iloc[idx: idx+bs]
x1, x2, labels = [], [], []
for _, item in tmp.iterrows():
x1.append(eval(item['code_ids_x']))
x2.append(eval(item['code_ids_y']))
labels.append([item['label']])
return x1, x2, torch.FloatTensor(labels)
if __name__ == '__main__':
# import argparse
#
# parser = argparse.ArgumentParser(description="Choose a dataset:[c|java]")
# parser.add_argument('--lang')
# args = parser.parse_args()
# args.lang = 'java'
# if not args.lang:
# print("No specified dataset")
# exit(1)
root = 'data/'
lang = 'java'
categories = 1
if lang == 'java':
categories = 5
print("Train for ", str.upper(lang))
# train_data = pd.read_pickle(root+lang+'/train/blocks_30w.pkl').sample(frac=1)
train_data = pd.read_csv(root + lang + '/train/blocks_30w.csv').sample(frac=1)
train_data = train_data.replace(-1, 0)
# val_data = pd.read_pickle(root+lang+'/dev/blocks_30w.pkl').sample(frac=1)
val_data = pd.read_csv(root + lang + '/dev/blocks_30w.csv').sample(frac=1)
val_data = val_data.replace(-1, 0)
# test_data = pd.read_pickle(root+lang+'/test/blocks_30w.pkl').sample(frac=1)
test_data = pd.read_csv(root + lang + '/test/blocks_30w.csv').sample(frac=1)
test_data = test_data.replace(-1, 0)
test_data.loc[test_data['label'] > 0, 'label'] = 1
word2vec = Word2Vec.load("./data/java/train/embedding/node_w2v_128_new").wv
MAX_TOKENS = word2vec.syn0.shape[0]
EMBEDDING_DIM = word2vec.syn0.shape[1]
embeddings = np.zeros((MAX_TOKENS + 1, EMBEDDING_DIM), dtype="float32")
embeddings[:word2vec.syn0.shape[0]] = word2vec.syn0
HIDDEN_DIM = 100
ENCODE_DIM = 128
LABELS = 1
EPOCHS = 10
BATCH_SIZE = 64
USE_GPU = True
model = BatchProgramCC(EMBEDDING_DIM,HIDDEN_DIM,MAX_TOKENS+1,ENCODE_DIM,LABELS,BATCH_SIZE,
USE_GPU, embeddings)
if USE_GPU:
model.cuda()
parameters = model.parameters()
optimizer = torch.optim.Adamax(parameters)
loss_function = torch.nn.BCELoss()
PATH = './model/model_clone_java_30w.pkl'
print(train_data)
precision, recall, f1 = 0, 0, 0
print('Start training...')
for t in range(5, categories+1):
# if lang == 'java':
# # train_data_t = train_data[train_data['label'].isin([t, 0])]
# train_data_t = train_data
# train_data_t.loc[train_data_t['label'] > 0, 'label'] = 1
#
# # val_data_t = val_data[val_data['label'].isin([t, 0])]
# val_data_t = val_data
# val_data_t.loc[val_data_t['label'] > 0, 'label'] = 1
#
# # test_data_t = test_data[test_data['label'].isin([t, 0])]
# test_data_t = test_data
# # test_data_t.loc[test_data_t['label'] > 0, 'label'] = 1
# else:
train_data_t, val_data_t, test_data_t = train_data, val_data, test_data
# training procedure
train_loss_ = []
val_loss_ = []
for epoch in range(EPOCHS):
start_time = time.time()
# training epoch
total_acc = 0.0
total_loss = 0.0
total = 0.0
i = 0
predicts = []
trues = []
model.train()
bs = BATCH_SIZE
# while i < len(train_data_t):
for i in tqdm(range(0, len(train_data_t), bs)):
if i + bs > len(train_data_t):
bs = len(train_data_t) - i
batch = get_batch(train_data_t, i, bs)
# i += BATCH_SIZE
train1_inputs, train2_inputs, train_labels = batch
if USE_GPU:
train1_inputs, train2_inputs, train_labels = train1_inputs, train2_inputs, train_labels.cuda()
model.zero_grad()
model.batch_size = len(train_labels)
model.hidden = model.init_hidden()
output = model(train1_inputs, train2_inputs)
loss = loss_function(output, Variable(train_labels))
loss.backward()
optimizer.step()
total += len(train_labels)
total_loss += loss.item() * len(train_labels)
predicted = (output.data > 0.5).cpu().numpy()
predicts.extend(predicted)
trues.extend(train_labels.cpu().numpy())
train_loss_.append(total_loss / total)
precision, recall, f1, _ = precision_recall_fscore_support(trues, predicts, average='binary')
total_loss = 0.0
total = 0.0
i = 0
bs = BATCH_SIZE
predicts = []
trues = []
model.eval()
# while i < len(val_data_t):
# batch = get_batch(val_data_t, i, BATCH_SIZE)
# i += BATCH_SIZE
for i in tqdm(range(0, len(val_data_t), bs)):
if i + bs > len(val_data_t):
bs = len(val_data_t) - i
batch = get_batch(val_data_t, i, BATCH_SIZE)
val1_inputs, val2_inputs, val_labels = batch
if USE_GPU:
val1_inputs, val2_inputs, val_labels = val1_inputs, val2_inputs, val_labels.cuda()
model.batch_size = len(val_labels)
model.hidden = model.init_hidden()
output = model(val1_inputs, val2_inputs)
loss = loss_function(output, Variable(val_labels))
total += len(val_labels)
total_loss += loss.item() * len(val_labels)
predicted = (output.data > 0.5).cpu().numpy()
predicts.extend(predicted)
trues.extend(val_labels.cpu().numpy())
val_loss_.append(total_loss / total)
precision_, recall_, f1_, _ = precision_recall_fscore_support(trues, predicts, average='binary')
print('categories-%d [Epoch: %3d/%3d] Training Loss: %.4f, Validation Loss: %.4f,'
% (t, epoch + 1, EPOCHS, train_loss_[epoch], val_loss_[epoch]))
print("Train results(P,R,F1):%.3f, %.3f, %.3f" % (precision, recall, f1))
print("Dev results(P,R,F1):%.3f, %.3f, %.3f" % (precision_, recall_, f1_))
torch.save({'epoch': epoch,
'model_state_dict': model.state_dict()
}, PATH)
print("Testing-%d..." % t)
# testing procedure
predicts = []
trues = []
total_loss = 0.0
total = 0.0
i = 0
while i < len(test_data_t):
batch = get_batch(test_data_t, i, BATCH_SIZE)
i += BATCH_SIZE
test1_inputs, test2_inputs, test_labels = batch
if USE_GPU:
test_labels = test_labels.cuda()
model.batch_size = len(test_labels)
model.hidden = model.init_hidden()
output = model(test1_inputs, test2_inputs)
# loss = loss_function(output, Variable(test_labels))
# calc testing acc
predicted = (output.data > 0.5).cpu().numpy()
predicts.extend(predicted)
trues.extend(test_labels.cpu().numpy())
# total += len(test_labels)
# total_loss += loss.item() * len(test_labels)
precision_, recall_, f1_, _ = precision_recall_fscore_support(trues, predicts, average='binary')
print("Test results(P,R,F1):%.3f, %.3f, %.3f" % (precision_, recall_, f1_))
# result = pd.DataFrame(np.array(predicts), columns=['predict'])
# result['true'] = pd.DataFrame(np.array(trues))
# result['label'] = pd.DataFrame(np.array(trues))
# result.loc[result['label'] > 0, 'label'] = 1
# weights = [0, 0.005, 0.001, 0.002, 0.010, 0.982]
# for k in range(1, categories+1):
# trues_ = result[result['true'].isin([0, k])]['label'].values
# predicts_ = result[result['true'].isin([0, k])]['predict'].values
# p, r, f, _ = precision_recall_fscore_support(trues_, predicts_, average='binary')
# precision += weights[k] * p
# recall += weights[k] * r
# f1 += weights[k] * f
# print("Type-" + str(k) + ": " + str(p) + " " + str(r) + " " + str(f))
#
# print("Total testing results(P,R,F1):%.3f, %.3f, %.3f" % (precision, recall, f1))
# if lang == 'java':
# weights = [0, 0.005, 0.001, 0.002, 0.010, 0.982]
# p, r, f, _ = precision_recall_fscore_support(trues, predicts, average='binary')
# precision += weights[t] * p
# recall += weights[t] * r
# f1 += weights[t] * f
# print("Type-" + str(t) + ": " + str(p) + " " + str(r) + " " + str(f))
# else:
# precision, recall, f1, _ = precision_recall_fscore_support(trues, predicts, average='binary')
#
# print("Total testing results(P,R,F1):%.3f, %.3f, %.3f" % (precision, recall, f1))

+ 170
- 0
工具&文件/ASTNN-clone/tree.py View File

@ -0,0 +1,170 @@
from javalang.ast import Node
class ASTNode(object):
def __init__(self, node):
self.node = node
# self.vocab = word_map
self.is_str = isinstance(self.node, str)
self.token = self.get_token()
# self.index = self.token_to_index(self.token)
self.children = self.add_children()
def is_leaf(self):
if self.is_str:
return True
return len(self.node.children()) == 0
def get_token(self, lower=True):
if self.is_str:
return self.node
name = self.node.__class__.__name__
token = name
is_name = False
if self.is_leaf():
attr_names = self.node.attr_names
if attr_names:
if 'names' in attr_names:
token = self.node.names[0]
elif 'name' in attr_names:
token = self.node.name
is_name = True
else:
token = self.node.value
else:
token = name
else:
if name == 'TypeDecl':
token = self.node.declname
if self.node.attr_names:
attr_names = self.node.attr_names
if 'op' in attr_names:
if self.node.op[0] == 'p':
token = self.node.op[1:]
else:
token = self.node.op
if token is None:
token = name
if lower and is_name:
token = token.lower()
return token
# def token_to_index(self, token):
# self.index = self.vocab[token].index if token in self.vocab else MAX_TOKENS
# return self.index
# def get_index(self):
# return self.index
def add_children(self):
if self.is_str:
return []
children = self.node.children()
if self.token in ['FuncDef', 'If', 'While', 'DoWhile']:
return [ASTNode(children[0][1])]
elif self.token == 'For':
return [ASTNode(children[c][1]) for c in range(0, len(children)-1)]
else:
return [ASTNode(child) for _, child in children]
class BlockNode(object):
def __init__(self, node):
self.node = node
self.is_str = isinstance(self.node, str)
self.token = self.get_token(node)
self.children = self.add_children()
def is_leaf(self):
if self.is_str:
return True
return len(self.node.children) == 0
def get_token(self, node):
if isinstance(node, str):
token = node
elif isinstance(node, set):
token = 'Modifier'
elif isinstance(node, Node):
token = node.__class__.__name__
else:
token = ''
return token
def ori_children(self, root):
if isinstance(root, Node):
if self.token in ['MethodDeclaration', 'ConstructorDeclaration']:
children = root.children[:-1]
else:
children = root.children
elif isinstance(root, set):
children = list(root)
else:
children = []
def expand(nested_list):
for item in nested_list:
if isinstance(item, list):
for sub_item in expand(item):
yield sub_item
elif item:
yield item
return list(expand(children))
def add_children(self):
if self.is_str:
return []
logic = ['SwitchStatement', 'IfStatement', 'ForStatement', 'WhileStatement', 'DoStatement']
children = self.ori_children(self.node)
if self.token in logic:
return [BlockNode(children[0])]
elif self.token in ['MethodDeclaration', 'ConstructorDeclaration']:
return [BlockNode(child) for child in children]
else:
return [BlockNode(child) for child in children if self.get_token( child) not in logic]
class SingleNode(ASTNode):
def __init__(self, node):
self.node = node
self.is_str = isinstance(self.node, str)
self.token = self.get_token()
self.children = []
def is_leaf(self):
if self.is_str:
return True
return len(self.node.children()) == 0
def get_token(self, lower=True):
if self.is_str:
return self.node
name = self.node.__class__.__name__
token = name
is_name = False
if self.is_leaf():
attr_names = self.node.attr_names
if attr_names:
if 'names' in attr_names:
token = self.node.names[0]
elif 'name' in attr_names:
token = self.node.name
is_name = True
else:
token = self.node.value
else:
token = name
else:
if name == 'TypeDecl':
token = self.node.declname
if self.node.attr_names:
attr_names = self.node.attr_names
if 'op' in attr_names:
if self.node.op[0] == 'p':
token = self.node.op[1:]
else:
token = self.node.op
if token is None:
token = name
if lower and is_name:
token = token.lower()
return token

+ 80
- 0
工具&文件/ASTNN-clone/utils.py View File

@ -0,0 +1,80 @@
import pandas as pd
import javalang
from javalang.ast import Node
from tree import ASTNode, BlockNode
import sys
sys.setrecursionlimit(10000)
def get_token(node):
token = ''
if isinstance(node, str):
token = node
elif isinstance(node, set):
token = 'Modifier'#node.pop()
elif isinstance(node, Node):
token = node.__class__.__name__
return token
def get_children(root):
if isinstance(root, Node):
children = root.children
elif isinstance(root, set):
children = list(root)
else:
children = []
def expand(nested_list):
for item in nested_list:
if isinstance(item, list):
for sub_item in expand(item):
yield sub_item
elif item:
yield item
return list(expand(children))
def get_sequence(node, sequence):
token, children = get_token(node), get_children(node)
sequence.append(token)
for child in children:
get_sequence(child, sequence)
if token in ['ForStatement', 'WhileStatement', 'DoStatement','SwitchStatement', 'IfStatement']:
sequence.append('End')
def get_blocks_v1(node, block_seq):
name, children = get_token(node), get_children(node)
logic = ['SwitchStatement','IfStatement', 'ForStatement', 'WhileStatement', 'DoStatement']
if name in ['MethodDeclaration', 'ConstructorDeclaration']:
block_seq.append(BlockNode(node))
body = node.body
for child in body:
if get_token(child) not in logic and not hasattr(child, 'block'):
block_seq.append(BlockNode(child))
else:
get_blocks_v1(child, block_seq)
elif name in logic:
block_seq.append(BlockNode(node))
for child in children[1:]:
token = get_token(child)
if not hasattr(node, 'block') and token not in logic+['BlockStatement']:
block_seq.append(BlockNode(child))
else:
get_blocks_v1(child, block_seq)
block_seq.append(BlockNode('End'))
elif name is 'BlockStatement' or hasattr(node, 'block'):
block_seq.append(BlockNode(name))
for child in children:
if get_token(child)not in logic:
block_seq.append(BlockNode(child))
else:
get_blocks_v1(child, block_seq)
else:
for child in children:
get_blocks_v1(child, block_seq)

+ 2668
- 0
工具&文件/ASTNN-clone/work.ipynb
File diff suppressed because it is too large
View File


+ 7
- 0
工具&文件/ASTNN-clone/说明.md View File

@ -0,0 +1,7 @@
1. 处理数据 python pipeline.py
- 修改split函数,所有都是测试
- 数据在data/c/下面,两个pkl要替换成新的
- 必须是可编译的代码
2. 运行test python test.py
- 可以放在126服务器上用cuda跑
- 把克隆看做二分类任务,阈值可以自行调整

+ 27
- 0
工具&文件/certs/9085819__shuishan.net.cn.key View File

@ -0,0 +1,27 @@
-----BEGIN RSA PRIVATE KEY-----
MIIEowIBAAKCAQEA2R5MIQo+9/oysDtsH0s9xGxBCvD0dtNgMnawXmt1ZJdzAvzf
bQ3AmT2y40zLtehcqU1XZh1LXZMfqi8GFwznm7fgCM1DqAd/kOKdBCpANWgj+OvL
Kxwum4bBbmuMg4IYpXoKaf94MWcx5axBmksYrnF2D95avcGYcSxoLHNL86+KY6xJ
9rMvORs+gLpMQGrY39Cz1n7Ef/9u0LRuWRK+4LHGxP/P4lMC4FvXH2K90RhQln0j
7RR4uxwkQn57Vhqf2cimTZpouma9+/hVpiaoVdaQbvRFFVRTRLcdsdftemQIqNhc
2KV2LzsivhbFcOwsLj7jSIjcwOwR0fH7TIel7QIDAQABAoIBAARILS8voQtJ82r3
WQwK81Zm3ieFlgSr6YdFQPgzvVZ1CC8kZpGjhktfZKJH9vKI+R7bqCAa7swTJTo5
gDC/L+gpybDSv4VWVIU6eudEoAyNl7wGhnS8swydLT5sv5IuZCcLT55EjA3JX7oM
WiTdW0jBcxcgBwEcCtIckpdh1LsjD1XU6m54w/c8gyfuZcWK+ByFbVzdk5m5sco+
wJso3AXFtxx9LhG82XMJW5BKuQfJH2bNI5YoSEGe1fzddZ+ugoCm7iNo4xf83JMT
9fUBlYxSlBa16lOaA9gwZgQEze7PEfzFKLQLFYTBDp7/QL02+TXAf4/ZdcXUnENs
8PrABS8CgYEA8Yr91FdDmxQrXwy8S2CYI3JuWt7LqqUipxduz6xyi6dZHlqXslAn
ndCVp6ihwryrzWSk2KltRaQIdbVTq7YukLQ0uKk/xneaeOh5RC9PclpGivzXNRAX
cEbro0pz/htPQ9ZVTVuvKhOpDclz1qxhhxR29I/oxU6NkBfGJf3G9TcCgYEA5h0Q
QHbkcyp8uAyXXXtbJTzAWkvKYWVZ2wwOF4MJ7l670BZef26IpY368zjmjKgNNK07
WV4FmFIuZje2Wb9jW3gV5qRysrmlb8Vzks7GEUKLSQcoPvVsiDyh8VvW0qEirH3o
rGpJeQBrrot9DirmQ+ehDKjGl1b1z3X+/XohD/sCgYBsX/2lsYW+5hzTp+YwN+Xr
OaO0F/Tv2uoiaIwql+hJKsv8p48azYYI9BbBxBLYSkkXfgnMwLArp/63uaUSDUr1
WDWziRT5Wp6vkzcd1dBisYinQezZfR/XG6sMeBJ1OBGnkVpyvClqyql2ayYTcwLL
Ve5Nqug45xbzSQd58lS7nwKBgQDGKkUynrCBlRcukHlRoceOO9ESca5pTZSiFLGW
AdztkFuBCaJ7bz7yA2EXT+sLOjWVJZG0lkmPMUaP9G5uv2ja/oEtzHSSAVm89Xdf
9/2OI5Y7X5SDE2tRr5Vuer53SRjJHuzeffGj6H7TI4CgUMVXuQNyGW5cKiEpdd4P
f7s1PQKBgFrezClBhd8c72+Q3hZZKoK5VpbqZXMcfcRoObpQ4W2OTY2+jqrSwhOO
12fWIG473Ok0pnTNxAsO4IdhKpWlXySMDwxS0Rns6TAcDnJa4sCahKnqIoMAqSTA
VUT/kwEUPat2/zlUhfOl4LooLAW36GDC/nc2urj2uVopdwdOTwVW
-----END RSA PRIVATE KEY-----

+ 76
- 0
工具&文件/certs/9085819__shuishan.net.cn.pem View File

@ -0,0 +1,76 @@
-----BEGIN CERTIFICATE-----
MIIHoTCCBYmgAwIBAgIQDOVX1N5YbGyIxnUGCW2zsDANBgkqhkiG9w0BAQsFADBc
MQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xNDAyBgNVBAMT
K1JhcGlkU1NMIEdsb2JhbCBUTFMgUlNBNDA5NiBTSEEyNTYgMjAyMiBDQTEwHhcN
MjMwMTAyMDAwMDAwWhcNMjQwMTEyMjM1OTU5WjAcMRowGAYDVQQDDBEqLnNodWlz
aGFuLm5ldC5jbjCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANkeTCEK
Pvf6MrA7bB9LPcRsQQrw9HbTYDJ2sF5rdWSXcwL8320NwJk9suNMy7XoXKlNV2Yd
S12TH6ovBhcM55u34AjNQ6gHf5DinQQqQDVoI/jryyscLpuGwW5rjIOCGKV6Cmn/
eDFnMeWsQZpLGK5xdg/eWr3BmHEsaCxzS/OvimOsSfazLzkbPoC6TEBq2N/Qs9Z+
xH//btC0blkSvuCxxsT/z+JTAuBb1x9ivdEYUJZ9I+0UeLscJEJ+e1Yan9nIpk2a
aLpmvfv4VaYmqFXWkG70RRVUU0S3HbHX7XpkCKjYXNildi87Ir4WxXDsLC4+40iI
3MDsEdHx+0yHpe0CAwEAAaOCA50wggOZMB8GA1UdIwQYMBaAFPCchf2in32PyWi7
1dSJTR2+05D/MB0GA1UdDgQWBBQ8TpzGYRl5Mcx4zZ8subB5HviPVTAtBgNVHREE
JjAkghEqLnNodWlzaGFuLm5ldC5jboIPc2h1aXNoYW4ubmV0LmNuMA4GA1UdDwEB
/wQEAwIFoDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwgZ8GA1UdHwSB
lzCBlDBIoEagRIZCaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL1JhcGlkU1NMR2xv
YmFsVExTUlNBNDA5NlNIQTI1NjIwMjJDQTEuY3JsMEigRqBEhkJodHRwOi8vY3Js
NC5kaWdpY2VydC5jb20vUmFwaWRTU0xHbG9iYWxUTFNSU0E0MDk2U0hBMjU2MjAy
MkNBMS5jcmwwPgYDVR0gBDcwNTAzBgZngQwBAgEwKTAnBggrBgEFBQcCARYbaHR0
cDovL3d3dy5kaWdpY2VydC5jb20vQ1BTMIGHBggrBgEFBQcBAQR7MHkwJAYIKwYB
BQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBRBggrBgEFBQcwAoZFaHR0
cDovL2NhY2VydHMuZGlnaWNlcnQuY29tL1JhcGlkU1NMR2xvYmFsVExTUlNBNDA5
NlNIQTI1NjIwMjJDQTEuY3J0MAkGA1UdEwQCMAAwggGABgorBgEEAdZ5AgQCBIIB
cASCAWwBagB3AHb/iD8KtvuVUcJhzPWHujS0pM27KdxoQgqf5mdMWjp0AAABhXFA
a8wAAAQDAEgwRgIhAI++QoPxPN2iOrxIQegcdgwWNzFPnZRoDFKXpBRKMBtlAiEA
vw/HkDuckkDkfKvtFp1VxeS7GyaetlhEjQOK6ixcuP8AdgBIsONr2qZHNA/lagL6
nTDrHFIBy1bdLIHZu7+rOdiEcwAAAYVxQGuxAAAEAwBHMEUCIQDhcg/4dci0YtzM
59uvgT4+2W780D6oRtCcX0IofxpnKwIgMliXM53/OAYXc0cpaKeotuoQE5ntDMCX
FfojCPe3G9IAdwA7U3d1Pi25gE6LMFsG/kA7Z9hPw/THvQANLXJv4frUFwAAAYVx
QGuvAAAEAwBIMEYCIQDvjMHZOQZQ08BLD5/XAHJ6Sw4HaEVwyd+lFpYHLi24vwIh
AJO6f0RX/rG56cKjHWV/mQsRH94kxJDy7EjzU89uAV0XMA0GCSqGSIb3DQEBCwUA
A4ICAQAq1H2pr19LU6VnkZGhGlIklQJJ4lhXfX6ciEP+9MSgbUyTTeohv0nUYl1i
+8ehjD9yBnujUKgjxQZ3KsPn9LSgukvTU1WOEBWw05qywBg9UQ4nnc4tkghSVk35
YhJum5L6Xxr0U7ohnffFaFn07+Am/q0HlGtHUvSsrN2yh3idAupQmRWp3sLQl+LR
VL/ynq2InSGoNcawFiIKd84CJMoHMyXW24iIop044yBvRl6v5DI74j6RUUno75rI
G3HK1NUfREBeKGV7s7cTFYbR+bBFuIURHs05nGeHy+xHxFh7CwhY2Bg1Do8Mbqzb
EAVV5yOvizkNqaVULcGg1+KEU92doK625dQ7iWqGLnX5gqFEAQaUgIX0MEgD4SDR
kr73k5aEKvxCR2y89+7ieHyZM3sFX9SoCn8Az/WaNwNInqaE7uewodi+mKr7AQNH
OoipoFvc5v7uZNnt+Ixv8VBB66jhNMYZ4YijXMpdqNYLerMVlsTZoavkaznkdQW3
jRKcjG35gN21vyKtao0tQC7CZpwGJMqKluDTU6qY8NbvCKEyRUKBH6FKh3FSj8tg
t4zEnE+XLsKys3NNuDMhA+q+MCSmBE5rqz1l4z7O2a8UQ6vKc9fSULWTK4qJuSgq
gkhh6LksuplrqG7E6yXHfRNMBuVQiMwgwATiRySDNuOvHJPaWw==
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIFyzCCBLOgAwIBAgIQCgWbJfVLPYeUzGYxR3U4ozANBgkqhkiG9w0BAQsFADBh
MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD
QTAeFw0yMjA1MDQwMDAwMDBaFw0zMTExMDkyMzU5NTlaMFwxCzAJBgNVBAYTAlVT
MRcwFQYDVQQKEw5EaWdpQ2VydCwgSW5jLjE0MDIGA1UEAxMrUmFwaWRTU0wgR2xv
YmFsIFRMUyBSU0E0MDk2IFNIQTI1NiAyMDIyIENBMTCCAiIwDQYJKoZIhvcNAQEB
BQADggIPADCCAgoCggIBAKY5PJhwCX2UyBb1nelu9APen53D5+C40T+BOZfSFaB0
v0WJM3BGMsuiHZX2IHtwnjUhLL25d8tgLASaUNHCBNKKUlUGRXGztuDIeXb48d64
k7Gk7u7mMRSrj+yuLSWOKnK6OGKe9+s6oaVIjHXY+QX8p2I2S3uew0bW3BFpkeAr
LBCU25iqeaoLEOGIa09DVojd3qc/RKqr4P11173R+7Ub05YYhuIcSv8e0d7qN1sO
1+lfoNMVfV9WcqPABmOasNJ+ol0hAC2PTgRLy/VZo1L0HRMr6j8cbR7q0nKwdbn4
Ar+ZMgCgCcG9zCMFsuXYl/rqobiyV+8U37dDScAebZTIF/xPEvHcmGi3xxH6g+dT
CjetOjJx8sdXUHKXGXC9ka33q7EzQIYlZISF7EkbT5dZHsO2DOMVLBdP1N1oUp0/
1f6fc8uTDduELoKBRzTTZ6OOBVHeZyFZMMdi6tA5s/jxmb74lqH1+jQ6nTU2/Mma
hGNxUuJpyhUHezgBA6sto5lNeyqc+3Cr5ehFQzUuwNsJaWbDdQk1v7lqRaqOlYjn
iomOl36J5txTs0wL7etCeMRfyPsmc+8HmH77IYVMUOcPJb+0gNuSmAkvf5QXbgPI
Zursn/UYnP9obhNbHc/9LYdQkB7CXyX9mPexnDNO7pggNA2jpbEarLmZGi4grMmf
AgMBAAGjggGCMIIBfjASBgNVHRMBAf8ECDAGAQH/AgEAMB0GA1UdDgQWBBTwnIX9
op99j8lou9XUiU0dvtOQ/zAfBgNVHSMEGDAWgBQD3lA1VtFMu2bwo+IbG8OXsj3R
VTAOBgNVHQ8BAf8EBAMCAYYwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMC
MHYGCCsGAQUFBwEBBGowaDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuZGlnaWNl
cnQuY29tMEAGCCsGAQUFBzAChjRodHRwOi8vY2FjZXJ0cy5kaWdpY2VydC5jb20v
RGlnaUNlcnRHbG9iYWxSb290Q0EuY3J0MEIGA1UdHwQ7MDkwN6A1oDOGMWh0dHA6
Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEdsb2JhbFJvb3RDQS5jcmwwPQYD
VR0gBDYwNDALBglghkgBhv1sAgEwBwYFZ4EMAQEwCAYGZ4EMAQIBMAgGBmeBDAEC
AjAIBgZngQwBAgMwDQYJKoZIhvcNAQELBQADggEBAAfjh/s1f5dDdfm0sNm74/dW
MbbsxfYV1LoTpFt+3MSUWvSbiPQfUkoV57b5rutRJvnPP9mSlpFwcZ3e1nSUbi2o
ITGA7RCOj23I1F4zk0YJm42qAwJIqOVenR3XtyQ2VR82qhC6xslxtNf7f2Ndx2G7
Mem4wpFhyPDT2P6UJ2MnrD+FC//ZKH5/ERo96ghz8VqNlmL5RXo8Ks9rMr/Ad9xw
Y4hyRvAz5920myUffwdUqc0SvPlFnahsZg15uT5HkK48tHR0TLuLH8aRpzh4KJ/Y
p0sARNb+9i1R4Fg5zPNvHs2BbIve0vkwxAy+R4727qYzl3027w9jEFC6HMXRaDc=
-----END CERTIFICATE-----

+ 43
- 0
工具&文件/jupyter-image-mladder/base/Dockerfile View File

@ -0,0 +1,43 @@
FROM continuumio/miniconda3:4.12.0
RUN apt-get update \
&& apt-get -y upgrade \
&& apt-get -y install zip curl \
&& apt-get install -y \
&& apt-get autoremove -y \
&& apt-get clean -y \
&& rm -rf /var/lib/apt/lists/*
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN conda install -y jupyter notebook
COPY notebook.html /tmp/
RUN rm /opt/conda/lib/python3.9/site-packages/notebook/templates/notebook.html \
&& mv /tmp/notebook.html /opt/conda/lib/python3.9/site-packages/notebook/templates/
ARG NB_USER="jupyter"
ARG NB_UID="1000"
ARG NB_GID="100"
ARG NB_PORT=8888
EXPOSE ${NB_PORT}
ENV SHELL=/bin/bash \
NB_USER="${NB_USER}" \
NB_UID=${NB_UID} \
NB_GID=${NB_GID} \
PYTHONPATH=$PYTHONPATH:/opt/conda/bin
ENV PATH="${PYTHONPATH}:${PATH}"
RUN useradd -l -m -s /bin/bash -N -u "${NB_UID}" "${NB_USER}" \
&& mkdir /home/${NB_USER}/.jupyter
COPY jupyter_notebook_config.py /home/${NB_USER}/.jupyter/
RUN chmod 777 /home/${NB_USER}/.jupyter/jupyter_notebook_config.py \
&& chmod -R 777 /home/${NB_USER}
ADD enterpoint.sh /enterpoint.sh

+ 7
- 0
工具&文件/jupyter-image-mladder/base/enterpoint.sh View File

@ -0,0 +1,7 @@
#! /bin/bash
# dir_name=`ls /home/jupyter`
config_str="\nc.NotebookApp.notebook_dir = \"/home/jupyter\""
echo -e ${config_str} >> /home/jupyter/.jupyter/jupyter_notebook_config.py
jupyter notebook --ip=0.0.0.0

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save