import pandas as pd import ast from collections import Counter ################################ 计算概率,排序 ############################## def statistical_frequency(combined_df): percentages_list = [] if 'zhenshi' in combined_df.colums: zhenshi = combined_df['zhenshi'] combined_df = combined_df.drop(columns=['zhenshi']) for i in range(len(combined_df)): row = combined_df.iloc[i] all_beam = [] # 遍历当前行的每列,收集所有元素 for j in range(len(row)): # 检查 row[j] 是否为字符串,必要时转换为列表 if isinstance(row[j], str): try: aa = ast.literal_eval(row[j]) # 转换字符串为列表 except (ValueError, SyntaxError): aa = [] # 如果无法解析,设置为空列表 else: aa = row[j] if isinstance(row[j], list) else [] # 如果是列表,直接使用 all_beam.extend(sorted(set(aa))) # 统计频率并计算比例 frequencies = Counter(all_beam) total_elements = len(combined_df.columns) # 用总元素数而非列数作为分母 if total_elements > 0: percentage = {key: value / total_elements for key, value in frequencies.items()} # 对比例从高到低排序 sorted_percentage = dict(sorted(percentage.items(), key=lambda item: item[1], reverse=True)) # 将排序后的结果添加到列表 percentages_list.append(sorted_percentage) # 将计算好的 percentages_list 添加为 DataFrame 的新列 combined_df['frequency'] = percentages_list combined_df['zhenshi'] = zhenshi return combined_df ###################################### 根据值查找键 ########################## def keys_valus(tgt_dict, key): value = [k for k, v in tgt_dict.items() if v == key] return value # 将张量转换为营养成分 def medium(b, tgt_dict): l = [] for i in b: tensor = i l.extend(keys_valus(tgt_dict, tensor)) l = [item for item in l if item not in ['', '', 'blank']] return l