@@ -35,7 +35,6 @@ def process_feature_annotations(features_file, repo_dir, flattened_keywords, tax
3535
3636 library_features = set ()
3737
38- api_id = 1 # Initialize API ID for tagging
3938 for source in data .get ('sources' , []):
4039 for feature in source .get ('files' , []):
4140 file_path = os .path .join (repo_dir , feature .get ('path' , '' ))
@@ -55,8 +54,7 @@ def process_feature_annotations(features_file, repo_dir, flattened_keywords, tax
5554
5655 if feature_names and line_index < len (lines ):
5756 for feature_name in feature_names :
58- tag = f"API_{ api_id } _{ feature_name } _{ method_name } "
59- api_id += 1
57+ tag = f"APIMatch|{ feature_name } |{ method_name } "
6058 line_annotations [line_index ].add (tag )
6159 library_features .add (tag )
6260 if add_to_fm (fm , taxonomy , feature_name , tag ) is None :
@@ -174,7 +172,7 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
174172 # Search only non-comment, non-test, non-HAnS-annotated lines
175173 keywords_found = {}
176174 for category , subcategory , keyword in flattened_keywords :
177- if re .search (rf"\b{ re . escape ( keyword ) } \b" , cleaned_line ):
175+ if re .search (rf"\b{ keyword } \b" , cleaned_line , re . IGNORECASE ):
178176 key = f"{ category } : { subcategory } "
179177 if key not in keywords_found :
180178 keywords_found [key ] = []
@@ -226,14 +224,23 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
226224
227225def determine_feature (pos_counter , matches , line_number , fm ):
228226 features = ''
229- for match in list (matches [line_number ]["Keywords Found" ].keys ()):
227+ for match in list (matches [line_number ]["Keywords Found" ].items ()):
230228 if len (features ) > 0 :
231229 features += ', '
232- path = match .split (' : ' )
230+ path = match [ 0 ] .split (' : ' )
233231 length = len (path )
234- feature = 'KeywordMatch_' + str (pos_counter [0 ]) + '_' + path [length - 1 ]
235- pos_counter [0 ] += 1
236- features += feature
232+ value = (
233+ match [1 ][0 ]
234+ .replace ('[' , '' )
235+ .replace (']' , '' )
236+ .replace ('(' , '' )
237+ .replace (')' , '' )
238+ .replace ('*' , '' )
239+ .replace ('?' , '' )
240+ )
241+
242+ feature_name = f'KeywordMatch|{ path [length - 1 ]} |{ value } '
243+ features += feature_name
237244
238245 current = fm
239246 i = 0
@@ -248,7 +255,14 @@ def determine_feature(pos_counter, matches, line_number, fm):
248255 if not found :
249256 current = Feature (name , current )
250257 i += 1
251- Feature (feature , current )
258+
259+ exists = False
260+ for f in current .sub_features :
261+ if f .name == feature_name :
262+ exists = True
263+ break
264+ if not exists :
265+ Feature (feature_name , current )
252266 return features , fm
253267
254268
0 commit comments