|
| 1 | +import torch |
| 2 | + |
| 3 | +from torch import device |
| 4 | + |
| 5 | + |
| 6 | +class GraphModule(torch.nn.Module): |
| 7 | + def forward( |
| 8 | + self, |
| 9 | + add_22, |
| 10 | + extended_attention_mask_2, |
| 11 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_3_modules_output_modules_layer_norm_parameters_bias_, |
| 12 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_3_modules_output_modules_layer_norm_parameters_weight_, |
| 13 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_, |
| 14 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_, |
| 15 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_layer_norm_parameters_bias_, |
| 16 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_layer_norm_parameters_weight_, |
| 17 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_, |
| 18 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_, |
| 19 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_, |
| 20 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_, |
| 21 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_, |
| 22 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_, |
| 23 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_, |
| 24 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_, |
| 25 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_, |
| 26 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_, |
| 27 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_layer_norm_parameters_bias_, |
| 28 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_layer_norm_parameters_weight_, |
| 29 | + ): |
| 30 | + hidden_states_66 = torch.nn.functional.layer_norm( |
| 31 | + add_22, |
| 32 | + (32,), |
| 33 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_3_modules_output_modules_layer_norm_parameters_weight_, |
| 34 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_3_modules_output_modules_layer_norm_parameters_bias_, |
| 35 | + 1e-12, |
| 36 | + ) |
| 37 | + add_22 = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_3_modules_output_modules_layer_norm_parameters_weight_ = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_3_modules_output_modules_layer_norm_parameters_bias_ = (None) |
| 38 | + linear_44 = torch.nn.functional.linear( |
| 39 | + hidden_states_66, |
| 40 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_, |
| 41 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_, |
| 42 | + ) |
| 43 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_ = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_ = (None) |
| 44 | + view_16 = linear_44.view(2, -1, 4, 8) |
| 45 | + linear_44 = None |
| 46 | + query_layer_4 = view_16.transpose(1, 2) |
| 47 | + view_16 = None |
| 48 | + linear_45 = torch.nn.functional.linear( |
| 49 | + hidden_states_66, |
| 50 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_, |
| 51 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_, |
| 52 | + ) |
| 53 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_ = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_ = (None) |
| 54 | + view_17 = linear_45.view(2, -1, 4, 8) |
| 55 | + linear_45 = None |
| 56 | + key_layer_4 = view_17.transpose(1, 2) |
| 57 | + view_17 = None |
| 58 | + linear_46 = torch.nn.functional.linear( |
| 59 | + hidden_states_66, |
| 60 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_, |
| 61 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_, |
| 62 | + ) |
| 63 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_ = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_ = (None) |
| 64 | + view_18 = linear_46.view(2, -1, 4, 8) |
| 65 | + linear_46 = None |
| 66 | + value_layer_4 = view_18.transpose(1, 2) |
| 67 | + view_18 = None |
| 68 | + transpose_25 = key_layer_4.transpose(-1, -2) |
| 69 | + key_layer_4 = None |
| 70 | + attention_scores_22 = torch.matmul(query_layer_4, transpose_25) |
| 71 | + query_layer_4 = transpose_25 = None |
| 72 | + attention_scores_23 = attention_scores_22 / 2.8284271247461903 |
| 73 | + attention_scores_22 = None |
| 74 | + eps = torch.tensor(1e-8, device=attention_scores_23.device) |
| 75 | + nan_val = eps / (eps - eps) |
| 76 | + attention_scores_23 = attention_scores_23 + nan_val |
| 77 | + nan_val = None |
| 78 | + to_8 = extended_attention_mask_2.to(device(type="cuda", index=0)) |
| 79 | + extended_attention_mask_2 = None |
| 80 | + attention_scores_24 = attention_scores_23 + to_8 |
| 81 | + attention_scores_23 = to_8 = None |
| 82 | + _log_api_usage_once_4 = torch._C._log_api_usage_once("python.nn_module") |
| 83 | + _log_api_usage_once_4 = None |
| 84 | + attention_probs_14 = torch.nn.functional.softmax( |
| 85 | + attention_scores_24, -1, _stacklevel=5 |
| 86 | + ) |
| 87 | + attention_scores_24 = None |
| 88 | + attention_probs_dropped_4 = torch.nn.functional.dropout( |
| 89 | + attention_probs_14, 0.0, False, False |
| 90 | + ) |
| 91 | + attention_probs_14 = None |
| 92 | + context_layer_22 = torch.matmul(attention_probs_dropped_4, value_layer_4) |
| 93 | + attention_probs_dropped_4 = value_layer_4 = None |
| 94 | + permute_14 = context_layer_22.permute(0, 2, 1, 3) |
| 95 | + context_layer_22 = None |
| 96 | + context_layer_23 = permute_14.contiguous() |
| 97 | + permute_14 = None |
| 98 | + context_layer_24 = context_layer_23.view(2, 14, 32) |
| 99 | + context_layer_23 = None |
| 100 | + hidden_states_67 = torch.nn.functional.linear( |
| 101 | + context_layer_24, |
| 102 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_, |
| 103 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_, |
| 104 | + ) |
| 105 | + context_layer_24 = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_ = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_ = (None) |
| 106 | + hidden_states_68 = torch.nn.functional.dropout( |
| 107 | + hidden_states_67, 0.0, False, False |
| 108 | + ) |
| 109 | + hidden_states_67 = None |
| 110 | + add_24 = hidden_states_68 + hidden_states_66 |
| 111 | + hidden_states_68 = hidden_states_66 = None |
| 112 | + hidden_states_69 = torch.nn.functional.layer_norm( |
| 113 | + add_24, |
| 114 | + (32,), |
| 115 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_layer_norm_parameters_weight_, |
| 116 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_layer_norm_parameters_bias_, |
| 117 | + 1e-12, |
| 118 | + ) |
| 119 | + add_24 = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_layer_norm_parameters_weight_ = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_layer_norm_parameters_bias_ = (None) |
| 120 | + hidden_states_70 = torch.nn.functional.linear( |
| 121 | + hidden_states_69, |
| 122 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_, |
| 123 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_, |
| 124 | + ) |
| 125 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_ = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_ = (None) |
| 126 | + hidden_states_71 = torch.nn.functional.gelu(hidden_states_70) |
| 127 | + hidden_states_70 = None |
| 128 | + hidden_states_72 = torch.nn.functional.linear( |
| 129 | + hidden_states_71, |
| 130 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_, |
| 131 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_, |
| 132 | + ) |
| 133 | + hidden_states_71 = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_ = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_ = (None) |
| 134 | + hidden_states_73 = torch.nn.functional.dropout( |
| 135 | + hidden_states_72, 0.0, False, False |
| 136 | + ) |
| 137 | + hidden_states_72 = None |
| 138 | + nan_val = torch.tensor(0.0, device=hidden_states_73.device) / torch.tensor( |
| 139 | + 0.0, device=hidden_states_73.device |
| 140 | + ) |
| 141 | + hidden_states_73 = hidden_states_73 + nan_val |
| 142 | + nan_val = None |
| 143 | + add_25 = hidden_states_73 + hidden_states_69 |
| 144 | + hidden_states_73 = hidden_states_69 = None |
| 145 | + hidden_states_74 = torch.nn.functional.layer_norm( |
| 146 | + add_25, |
| 147 | + (32,), |
| 148 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_layer_norm_parameters_weight_, |
| 149 | + l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_layer_norm_parameters_bias_, |
| 150 | + 1e-12, |
| 151 | + ) |
| 152 | + add_25 = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_layer_norm_parameters_weight_ = l_l_self_modules_text_model_modules_encoder_modules_layer_modules_4_modules_output_modules_layer_norm_parameters_bias_ = (None) |
| 153 | + return (hidden_states_74,) |
0 commit comments