Files
particle_analyse/LLM.ipynb
2024-06-10 09:09:10 +08:00

346 lines
11 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "074cbeba",
"metadata": {},
"outputs": [],
"source": [
"import uproot\n",
"import attrs\n",
"from typing import List\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b49ac144",
"metadata": {},
"outputs": [],
"source": [
"with uproot.open(\"./fast.root\") as f:\n",
" tree = f[\"tree\"]\n",
" a = tree.arrays(library=\"pd\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c2d94275",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['btag', 'ctag', 'gen_match', 'genpart_eta', 'genpart_phi',\n",
" 'genpart_pid', 'genpart_pt', 'is_signal', 'jet_energy', 'jet_eta',\n",
" 'jet_nparticles', 'jet_phi', 'jet_pt', 'part_charge', 'part_d0err',\n",
" 'part_d0val', 'part_deta', 'part_dphi', 'part_dzerr', 'part_dzval',\n",
" 'part_energy', 'part_pid', 'part_pt', 'part_px', 'part_py', 'part_pz',\n",
" 'part_isChargedHadron', 'part_isChargedKaon', 'part_isElectron',\n",
" 'part_isKLong', 'part_isKShort', 'part_isMuon', 'part_isNeutralHadron',\n",
" 'part_isPhoton', 'part_isPi0', 'part_isPion', 'part_isProton',\n",
" 'label_b', 'label_bb', 'label_bbar', 'label_c', 'label_cbar',\n",
" 'label_cc', 'label_d', 'label_dbar', 'label_g', 'label_gg', 'label_s',\n",
" 'label_sbar', 'label_u', 'label_ubar'],\n",
" dtype='object')\n"
]
}
],
"source": [
"print(a.keys())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "93a76758",
"metadata": {},
"outputs": [],
"source": [
"import attrs\n",
"import uproot\n",
"import numpy as np\n",
"from typing import List\n",
"@attrs.define\n",
"class ParticleBase:\n",
" part_charge: int = attrs.field() # charge of the particle\n",
" part_energy: float = attrs.field() # energy of the particle\n",
" part_px: float = attrs.field() # x-component of the momentum vector\n",
" part_py: float = attrs.field() # y-component of the momentum vector\n",
" part_pz: float = attrs.field() # z-component of the momentum vector\n",
" log_energy: float = attrs.field() # log10(part_energy)\n",
" log_pt: float = attrs.field() # log10(part_pt)\n",
" part_deta: float = attrs.field() # pseudorapidity\n",
" part_dphi: float = attrs.field() # azimuthal angle\n",
" part_logptrel: float = attrs.field() # log10(pt(particle)/pt(jet))\n",
" part_logerel: float = attrs.field() # log10(energy(particle)/energy(jet))\n",
" part_deltaR: float = attrs.field() # distance between the particle and the jet\n",
" part_d0: float = attrs.field() # tanh(d0)\n",
" part_dz: float = attrs.field() # tanh(z0)\n",
" particle_type: str = attrs.field() # type of the particle (e.g. charged kaon, charged pion, proton, electron, muon, neutral hadron, photon, others)\n",
" particle_pid: int = attrs.field() # pid of the particle (e.g. 0,1,2,3,4,5,6,7)\n",
"\n",
"@attrs.define\n",
"class Jet:\n",
" jet_b: float = attrs.field()\n",
" jet_bbar: float = attrs.field()\n",
" jet_energy: float = attrs.field() # energy of the jet\n",
" jet_pt: float = attrs.field() # transverse momentum of the jet\n",
" jet_eta: float = attrs.field() # pseudorapidity of the jet\n",
" particles: List[ParticleBase] = attrs.field(factory=list) # list of particles in the jet\n",
" \n",
" def __len__(self):\n",
" return len(self.particles)\n",
" \n",
"@attrs.define\n",
"class JetSet:\n",
" jets: List[Jet]\n",
" \n",
" def __len__(self):\n",
" return len(self.jets)\n",
" \n",
"def jud_type(jtmp): #这个函数用来判断每个粒子的类型每个粒子可以是electron、muon、pion 等\n",
" particle_dict = {'NeutralHadron':0,'Photon':1, 'Electron':2, 'Muon':3, 'Pion':4,'ChargedKaon':5, 'Proton':6}\n",
" max_element = max(jtmp)\n",
" idx = jtmp.index(max_element)\n",
" items = list(particle_dict.items())\n",
" return items[idx][0], items[idx][1]\n",
" \n",
"with uproot.open(\"./data/data_fast/fast_bb.root\") as f:\n",
" tree = f[\"tree\"]\n",
" a = tree.arrays(library=\"pd\")\n",
"#a里面有很多喷注我们的目标是判断每个喷注 它是 b 还是 bbar. 每个喷注里含了很多粒子。以下以 part 开头的变量都是列表,比如 part_pt它是存储了一个喷注中所有粒子的pt\n",
"#part_energy 存储了一个喷注中所有粒子的energy。\n",
"\n",
"\n",
"jet_list = []\n",
"for j in a.itertuples(): \n",
" part_pt = np.array(j.part_pt)\n",
" jet_pt = np.array(j.jet_pt)\n",
" part_logptrel = np.log(np.divide(part_pt, jet_pt))\n",
" \n",
" part_energy = np.array(j.part_energy)\n",
" jet_energy = np.array(j.jet_energy)\n",
" part_logerel = np.log(np.divide(part_energy, jet_energy))\n",
" \n",
" part_deta = np.array(j.part_deta)\n",
" part_dphi = np.array(j.part_dphi)\n",
" part_deltaR = np.hypot(part_deta, part_dphi)\n",
" \n",
" assert len(j.part_pt) == len(j.part_energy) == len(j.part_deta)\n",
"\n",
" particles = []\n",
" \n",
" \n",
" particle_list = ['part_isNeutralHadron','part_isPhoton', 'part_isElectron', 'part_isMuon', 'part_isPion','part_isChargedKaon', 'part_isProton']\n",
" part_type = []\n",
" part_pid = []\n",
" for pn in range(len(j.part_pt)):\n",
" jtmp = [j.part_isNeutralHadron[pn], j.part_isPhoton[pn], j.part_isElectron[pn], j.part_isMuon[pn], j.part_isPion[pn],\n",
" j.part_isChargedKaon[pn], j.part_isProton[pn]]\n",
" tmp_type, tmp_pid = jud_type(jtmp)\n",
" part_type.append(tmp_type)\n",
" part_pid.append(tmp_pid)\n",
" \n",
" bag = zip(j.part_charge, j.part_energy, j.part_px, j.part_py, j.part_pz, np.log(j.part_energy), \n",
" np.log(j.part_pt), j.part_deta, j.part_dphi, part_logptrel, part_logerel, part_deltaR, \n",
" np.tanh(j.part_d0val), np.tanh(j.part_dzval), part_type, part_pid)\n",
" \n",
" #下边的代码是要对第 j 个喷注中的所有粒子做循环,将每个粒子都 存成 ParticleBase然后 append 到 particles里\n",
" #所以 partices 存储了 第 j 个喷注中所有粒子的信息\n",
" for c, en, px, py, pz, lEn, lPt, eta, phi, ii, jj, kk, d0, dz, ptype, pid in bag:\n",
" particles.append(ParticleBase(\n",
" part_charge=c, \n",
" part_energy=en, \n",
" part_px=px, \n",
" part_py=py,\n",
" part_pz=pz, \n",
" log_energy=lEn, \n",
" log_pt=lPt,\n",
" part_deta=eta, \n",
" part_dphi=phi, \n",
" part_logptrel=ii,\n",
" part_logerel=jj, \n",
" part_deltaR=kk,\n",
" part_d0=d0, \n",
" part_dz=dz, \n",
" particle_type=ptype, # assuming you will set this correctly\n",
" particle_pid=pid # assuming you will set this correctly\n",
" ))\n",
" # add jets jet = 喷注,\n",
" jet = Jet(\n",
" jet_b=j.label_b, #如果此jet是b那么label_b = 1, 否则label_b = 0\n",
" jet_bbar=j.label_bbar, #如果此jet是bbar那么label_bbar = 1\n",
" jet_energy=j.jet_energy, #第 j 个喷注的 energy\n",
" jet_pt=j.jet_pt, # 第 j 个喷注的 pt\n",
" jet_eta=j.jet_eta, # 第 j 个喷注的 eta (是一种角度的表示)\n",
" particles=particles # 第 j 个喷注中所有的 粒子\n",
" )\n",
" jet_list.append(jet)\n",
"\n",
"jet_set1 = JetSet(jets=jet_list)\n",
"\n",
"#如上所说,每个喷注有很多粒子,你最后输入模型的是每个喷注中所有粒子的如下信息\n",
"#log_energy log_pt part_logerel part_logptrel part_deltaR part_charge part_d0 part_dz \n",
"#part_deta part_dphi particle_pid"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c9995622",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b94329b2",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"jet_set1.jets[0].jet_bbar"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "4242ce6c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.0\n",
"-1.0\n",
"1.0\n",
"0.0\n",
"-1.0\n",
"1.0\n",
"-1.0\n",
"0.0\n",
"-1.0\n",
"0.0\n",
"0.0\n",
"0.0\n",
"0.0\n",
"1.0\n",
"0.0\n",
"0.0\n",
"0.0\n"
]
}
],
"source": [
"for num in range(len(jet_set1.jets[0].particles)):\n",
" print(jet_set1.jets[0].particles[num].part_charge)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "6f0dc08e",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-1.0\n",
"1.0\n",
"-1.0\n",
"-1.0\n",
"0.0\n",
"1.0\n",
"-1.0\n",
"1.0\n",
"0.0\n",
"0.0\n",
"0.0\n",
"0.0\n",
"-1.0\n",
"0.0\n",
"0.0\n"
]
}
],
"source": [
"for num in range(len(jet_set1.jets[1].particles)):\n",
" print(jet_set1.jets[1].particles[num].part_charge)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "e6809f05",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10000\n"
]
}
],
"source": [
"print(len(jet_set1.jets))"
]
},
{
"cell_type": "markdown",
"id": "67cd5f19",
"metadata": {},
"source": [
"## 与particle-transformer对比的实验设计\n",
"\n",
"使用这些属性进行与particle-transformer准确率对比bb 100w bbbar 100w\n",
"\n",
"log_energy log_pt part_logerel part_logptrel part_deltaR part_charge part_d0 part_dz part_deta part_dphi particle_pid"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 5
}