Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

203 linhas
8.9 KiB

há 3 anos
  1. import sys
  2. sys.path.append('../')
  3. import tensorflow as tf
  4. try:
  5. import tensorflow.python.keras as keras
  6. from tensorflow.python.keras import layers
  7. import tensorflow.python.keras.backend as K
  8. except:
  9. import tensorflow.keras as keras
  10. from tensorflow.keras import layers
  11. import tensorflow.keras.backend as K
  12. from typing import Optional
  13. from PIPE.config import Config
  14. from PIPE.keras_attention_layer import Attention_layer
  15. class Code2VecModel():
  16. def __init__(self,config: Config):
  17. self.keras_train_model: Optional[keras.Model] = None
  18. self.config = config
  19. ##################################################搭建模型结构函数########################################################
  20. def _create_keras_model(self):
  21. path_source_token_input = layers.Input((self.config.MAX_CONTEXTS,), dtype=tf.int32)
  22. path_input = layers.Input((self.config.MAX_CONTEXTS,), dtype=tf.int32)
  23. path_target_token_input = layers.Input((self.config.MAX_CONTEXTS,), dtype=tf.int32)
  24. context_valid_mask = layers.Input((self.config.MAX_CONTEXTS,))
  25. # path embedding layer
  26. # (None, max_contents) -> (None,max_contents,path_embedding_size)
  27. paths_embedded = layers.Embedding(
  28. self.config.path_vocab_size, self.config.PATH_EMBEDDINGS_SIZE, name = 'path_embedding'
  29. )(path_input)
  30. # terminal embedding layer
  31. # (None, max_contents) -> (None,max_contents,token_embedding_size)
  32. token_embedding_shared_layer = layers.Embedding(
  33. self.config.token_vocab_size, self.config.TOKEN_EMBEDDINGS_SIZE, name = 'token_embedding'
  34. )
  35. path_source_token_embedded = token_embedding_shared_layer(path_source_token_input)
  36. path_target_token_embedded = token_embedding_shared_layer(path_target_token_input)
  37. # concatenate layer: paths -> [source, path, target]
  38. # [3 * (None,max_contents, token_embedding_size)] -> (None, max_contents,3*embedding_size)
  39. context_embedded = layers.Concatenate()([path_source_token_embedded, paths_embedded, path_target_token_embedded])
  40. context_embedded = layers.Dropout(1 - self.config.DROPOUT_KEEP_RATE)(context_embedded)
  41. # Dense layer: (None,max_contents,3*embedding_size) -> (None,max_contents, code_vector_size)
  42. context_after_dense = layers.TimeDistributed(
  43. layers.Dense(self.config.CODE_VECTOR_SIZE, use_bias=False, activation='tanh')
  44. )(context_embedded)
  45. # attention layer: (None, max_contents,code_vector_size) -> (None,code_vector_size)
  46. code_vectors, attention_weights = Attention_layer(name='attention')(
  47. [context_after_dense, context_valid_mask]
  48. )
  49. """
  50. C2AE分类器进行分类的模型
  51. """
  52. Fx = layers.Dense(
  53. 186 , use_bias=True,activation=None,name='Fx'
  54. )(code_vectors)
  55. Fx_relu = tf.tanh(Fx)
  56. Fx_dropout = layers.Dropout(0.5)(Fx_relu)
  57. targets_input = layers.Input((self.config.categories,), dtype=tf.float32)
  58. targets_hidden = layers.Dense(
  59. 186, use_bias=True,activation=None,name='targets_hidden'
  60. )(targets_input)
  61. targets_hidden_relu = tf.tanh(targets_hidden)
  62. targets_hidden_dropout = layers.Dropout(0.5)(targets_hidden_relu)
  63. targets_output = layers.Dense(
  64. 186, use_bias=True,activation=None,name='targets_embedding'
  65. )(targets_hidden_dropout)
  66. targets_output_relu = tf.tanh(targets_output)
  67. targets_output_dropout = layers.Dropout(0.5)(targets_output_relu)
  68. targets_loss = layers.subtract([targets_output_dropout,Fx_dropout],name='targets_loss')
  69. Fd1 = layers.Dense(
  70. 186, use_bias=True,activation=None,name='Fd1'
  71. )(Fx_dropout)
  72. Fd_relu1 = tf.tanh(Fd1)
  73. Fd_dropout1 = layers.Dropout(0.5)(Fd_relu1)
  74. Fd = layers.Dense(
  75. 186, use_bias=True, activation=None, name='Fd'
  76. )(Fd_dropout1)
  77. Fd_relu = tf.tanh(Fd)
  78. Fd_dropout = layers.Dropout(0.5)(Fd_relu)
  79. target_index_3 = layers.Dense(
  80. self.config.categories, use_bias=True,activation='sigmoid',name='target_index_3'
  81. )(Fd_dropout)
  82. inputs = [path_source_token_input, path_input, path_target_token_input, context_valid_mask, targets_input]
  83. self.keras_train_model = keras.Model(inputs = inputs, outputs = [target_index_3,targets_loss])
  84. print("------------------create_keras_model Done.-------------------------")
  85. @classmethod
  86. def _create_optimizer(self):
  87. return tf.keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
  88. def _comile_keras_model(self, optimizer=None):
  89. if optimizer is None:
  90. optimizer = self._create_optimizer()
  91. ################################评估指标##############################
  92. def exactMatch(y_true, y_pred):
  93. y_pred1 = y_pred
  94. row_dif = K.cast(K.sum(K.round(K.clip(y_true * (1 - y_pred1) + (1-y_true) * y_pred1,0,1)), axis=1) > K.epsilon(),'float32')
  95. dif = K.sum(K.round(row_dif))
  96. row_equ = K.cast(K.abs(K.sum(K.round(K.clip(y_true * y_pred1 + (1-y_true) * (1 - y_pred1),0,1)), axis=1) - self.config.categories) < K.epsilon(),'float32')
  97. equ = K.sum(K.round(row_equ))
  98. return equ / (equ + dif + K.epsilon())
  99. def micro_getPrecsion(y_true, y_pred):
  100. TP = tf.reduce_sum(y_true * tf.round(y_pred))
  101. TN = tf.reduce_sum((1 - y_true) * (1 - tf.round(y_pred)))
  102. FP = tf.reduce_sum((1 - y_true) * tf.round(y_pred))
  103. FN = tf.reduce_sum(y_true * (1 - tf.round(y_pred)))
  104. precision = TP / (TP + FP)
  105. return precision
  106. def micro_getRecall(y_true, y_pred):
  107. TP = tf.reduce_sum(y_true * tf.round(y_pred))
  108. TN = tf.reduce_sum((1 - y_true) * (1 - tf.round(y_pred)))
  109. FP = tf.reduce_sum((1 - y_true) * tf.round(y_pred))
  110. FN = tf.reduce_sum(y_true * (1 - tf.round(y_pred)))
  111. precision = TP / (TP + FP)
  112. recall = TP / (TP + FN)
  113. return recall
  114. # F1-score评价指标
  115. def micro_F1score(y_true, y_pred):
  116. TP = tf.reduce_sum(y_true * tf.round(y_pred))
  117. TN = tf.reduce_sum((1 - y_true) * (1 - tf.round(y_pred)))
  118. FP = tf.reduce_sum((1 - y_true) * tf.round(y_pred))
  119. FN = tf.reduce_sum(y_true * (1 - tf.round(y_pred)))
  120. precision = TP / (TP + FP)
  121. recall = TP / (TP + FN)
  122. F1score = 2 * precision * recall / (precision + recall)
  123. return F1score
  124. def macro_getPrecison(y_true, y_pred):
  125. col_TP = K.sum(y_true * K.round(y_pred), axis=0)
  126. col_TN = K.sum((1 - y_true) * (1 - K.round(y_pred)), axis=0)
  127. col_FP = K.sum((1 - y_true) * K.round(y_pred), axis=0)
  128. col_FN = K.sum(y_true * (1 - K.round(y_pred)), axis=0)
  129. precsion = K.mean(col_TP / (col_TP + col_FP + K.epsilon()))
  130. return precsion
  131. def macro_getRecall(y_true, y_pred):
  132. # print(y_true)
  133. row_TP = K.sum(y_true * K.round(y_pred), axis=0)
  134. row_TN = K.sum((1 - y_true) * (1 - K.round(y_pred)), axis=0)
  135. row_FP = K.sum((1 - y_true) * K.round(y_pred), axis=0)
  136. row_FN = K.sum(y_true * (1 - K.round(y_pred)), axis=0)
  137. recall = K.mean(row_TP / (row_TP + row_FN + K.epsilon()))
  138. return recall
  139. def macro_getF1score(y_true, y_pred):
  140. precision = macro_getPrecison(y_true, y_pred)
  141. recall = macro_getRecall(y_true, y_pred)
  142. F1score = 2 * precision * recall / (precision + recall)
  143. return F1score
  144. """
  145. C2AE损失函数
  146. custom_loss:
  147. target_loss的平方和,y_true是随机设的
  148. a_cross_loss:
  149. """
  150. def custom_loss(y_true,y_pred):
  151. return 1*tf.reduce_mean(tf.square(y_pred))
  152. def a_cross_loss(y_true, y_pred):
  153. cross_loss = tf.add(tf.log(1e-10 + y_pred) * y_true, tf.log(1e-10 + (1 - y_pred)) * (1 - y_true))
  154. cross_entropy_label = -1 * tf.reduce_mean(tf.reduce_sum(cross_loss, 1))
  155. return 0.1*cross_entropy_label
  156. self.keras_train_model.compile(
  157. loss = {'target_index_3':a_cross_loss,'targets_loss':custom_loss},
  158. optimizer=optimizer,
  159. metrics={'target_index_3':[exactMatch, micro_getPrecsion, micro_getRecall,micro_F1score,macro_getPrecison,macro_getRecall,macro_getF1score]}
  160. )
  161. if __name__ == "__main__":
  162. config = Config(set_defaults=True, load_from_args=True, verify=False)
  163. model = Code2VecModel(config)
  164. model._create_keras_model()