first add

This commit is contained in:
2024-06-10 09:09:10 +08:00
parent 52d5f402bc
commit 8f9fac8bd8
14 changed files with 2042 additions and 0 deletions

345
LLM.ipynb Normal file
View File

@@ -0,0 +1,345 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "074cbeba",
"metadata": {},
"outputs": [],
"source": [
"import uproot\n",
"import attrs\n",
"from typing import List\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b49ac144",
"metadata": {},
"outputs": [],
"source": [
"with uproot.open(\"./fast.root\") as f:\n",
" tree = f[\"tree\"]\n",
" a = tree.arrays(library=\"pd\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c2d94275",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['btag', 'ctag', 'gen_match', 'genpart_eta', 'genpart_phi',\n",
" 'genpart_pid', 'genpart_pt', 'is_signal', 'jet_energy', 'jet_eta',\n",
" 'jet_nparticles', 'jet_phi', 'jet_pt', 'part_charge', 'part_d0err',\n",
" 'part_d0val', 'part_deta', 'part_dphi', 'part_dzerr', 'part_dzval',\n",
" 'part_energy', 'part_pid', 'part_pt', 'part_px', 'part_py', 'part_pz',\n",
" 'part_isChargedHadron', 'part_isChargedKaon', 'part_isElectron',\n",
" 'part_isKLong', 'part_isKShort', 'part_isMuon', 'part_isNeutralHadron',\n",
" 'part_isPhoton', 'part_isPi0', 'part_isPion', 'part_isProton',\n",
" 'label_b', 'label_bb', 'label_bbar', 'label_c', 'label_cbar',\n",
" 'label_cc', 'label_d', 'label_dbar', 'label_g', 'label_gg', 'label_s',\n",
" 'label_sbar', 'label_u', 'label_ubar'],\n",
" dtype='object')\n"
]
}
],
"source": [
"print(a.keys())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "93a76758",
"metadata": {},
"outputs": [],
"source": [
"import attrs\n",
"import uproot\n",
"import numpy as np\n",
"from typing import List\n",
"@attrs.define\n",
"class ParticleBase:\n",
" part_charge: int = attrs.field() # charge of the particle\n",
" part_energy: float = attrs.field() # energy of the particle\n",
" part_px: float = attrs.field() # x-component of the momentum vector\n",
" part_py: float = attrs.field() # y-component of the momentum vector\n",
" part_pz: float = attrs.field() # z-component of the momentum vector\n",
" log_energy: float = attrs.field() # log10(part_energy)\n",
" log_pt: float = attrs.field() # log10(part_pt)\n",
" part_deta: float = attrs.field() # pseudorapidity\n",
" part_dphi: float = attrs.field() # azimuthal angle\n",
" part_logptrel: float = attrs.field() # log10(pt(particle)/pt(jet))\n",
" part_logerel: float = attrs.field() # log10(energy(particle)/energy(jet))\n",
" part_deltaR: float = attrs.field() # distance between the particle and the jet\n",
" part_d0: float = attrs.field() # tanh(d0)\n",
" part_dz: float = attrs.field() # tanh(z0)\n",
" particle_type: str = attrs.field() # type of the particle (e.g. charged kaon, charged pion, proton, electron, muon, neutral hadron, photon, others)\n",
" particle_pid: int = attrs.field() # pid of the particle (e.g. 0,1,2,3,4,5,6,7)\n",
"\n",
"@attrs.define\n",
"class Jet:\n",
" jet_b: float = attrs.field()\n",
" jet_bbar: float = attrs.field()\n",
" jet_energy: float = attrs.field() # energy of the jet\n",
" jet_pt: float = attrs.field() # transverse momentum of the jet\n",
" jet_eta: float = attrs.field() # pseudorapidity of the jet\n",
" particles: List[ParticleBase] = attrs.field(factory=list) # list of particles in the jet\n",
" \n",
" def __len__(self):\n",
" return len(self.particles)\n",
" \n",
"@attrs.define\n",
"class JetSet:\n",
" jets: List[Jet]\n",
" \n",
" def __len__(self):\n",
" return len(self.jets)\n",
" \n",
"def jud_type(jtmp): #这个函数用来判断每个粒子的类型每个粒子可以是electron、muon、pion 等\n",
" particle_dict = {'NeutralHadron':0,'Photon':1, 'Electron':2, 'Muon':3, 'Pion':4,'ChargedKaon':5, 'Proton':6}\n",
" max_element = max(jtmp)\n",
" idx = jtmp.index(max_element)\n",
" items = list(particle_dict.items())\n",
" return items[idx][0], items[idx][1]\n",
" \n",
"with uproot.open(\"./data/data_fast/fast_bb.root\") as f:\n",
" tree = f[\"tree\"]\n",
" a = tree.arrays(library=\"pd\")\n",
"#a里面有很多喷注我们的目标是判断每个喷注 它是 b 还是 bbar. 每个喷注里含了很多粒子。以下以 part 开头的变量都是列表,比如 part_pt它是存储了一个喷注中所有粒子的pt\n",
"#part_energy 存储了一个喷注中所有粒子的energy。\n",
"\n",
"\n",
"jet_list = []\n",
"for j in a.itertuples(): \n",
" part_pt = np.array(j.part_pt)\n",
" jet_pt = np.array(j.jet_pt)\n",
" part_logptrel = np.log(np.divide(part_pt, jet_pt))\n",
" \n",
" part_energy = np.array(j.part_energy)\n",
" jet_energy = np.array(j.jet_energy)\n",
" part_logerel = np.log(np.divide(part_energy, jet_energy))\n",
" \n",
" part_deta = np.array(j.part_deta)\n",
" part_dphi = np.array(j.part_dphi)\n",
" part_deltaR = np.hypot(part_deta, part_dphi)\n",
" \n",
" assert len(j.part_pt) == len(j.part_energy) == len(j.part_deta)\n",
"\n",
" particles = []\n",
" \n",
" \n",
" particle_list = ['part_isNeutralHadron','part_isPhoton', 'part_isElectron', 'part_isMuon', 'part_isPion','part_isChargedKaon', 'part_isProton']\n",
" part_type = []\n",
" part_pid = []\n",
" for pn in range(len(j.part_pt)):\n",
" jtmp = [j.part_isNeutralHadron[pn], j.part_isPhoton[pn], j.part_isElectron[pn], j.part_isMuon[pn], j.part_isPion[pn],\n",
" j.part_isChargedKaon[pn], j.part_isProton[pn]]\n",
" tmp_type, tmp_pid = jud_type(jtmp)\n",
" part_type.append(tmp_type)\n",
" part_pid.append(tmp_pid)\n",
" \n",
" bag = zip(j.part_charge, j.part_energy, j.part_px, j.part_py, j.part_pz, np.log(j.part_energy), \n",
" np.log(j.part_pt), j.part_deta, j.part_dphi, part_logptrel, part_logerel, part_deltaR, \n",
" np.tanh(j.part_d0val), np.tanh(j.part_dzval), part_type, part_pid)\n",
" \n",
" #下边的代码是要对第 j 个喷注中的所有粒子做循环,将每个粒子都 存成 ParticleBase然后 append 到 particles里\n",
" #所以 partices 存储了 第 j 个喷注中所有粒子的信息\n",
" for c, en, px, py, pz, lEn, lPt, eta, phi, ii, jj, kk, d0, dz, ptype, pid in bag:\n",
" particles.append(ParticleBase(\n",
" part_charge=c, \n",
" part_energy=en, \n",
" part_px=px, \n",
" part_py=py,\n",
" part_pz=pz, \n",
" log_energy=lEn, \n",
" log_pt=lPt,\n",
" part_deta=eta, \n",
" part_dphi=phi, \n",
" part_logptrel=ii,\n",
" part_logerel=jj, \n",
" part_deltaR=kk,\n",
" part_d0=d0, \n",
" part_dz=dz, \n",
" particle_type=ptype, # assuming you will set this correctly\n",
" particle_pid=pid # assuming you will set this correctly\n",
" ))\n",
" # add jets jet = 喷注,\n",
" jet = Jet(\n",
" jet_b=j.label_b, #如果此jet是b那么label_b = 1, 否则label_b = 0\n",
" jet_bbar=j.label_bbar, #如果此jet是bbar那么label_bbar = 1\n",
" jet_energy=j.jet_energy, #第 j 个喷注的 energy\n",
" jet_pt=j.jet_pt, # 第 j 个喷注的 pt\n",
" jet_eta=j.jet_eta, # 第 j 个喷注的 eta (是一种角度的表示)\n",
" particles=particles # 第 j 个喷注中所有的 粒子\n",
" )\n",
" jet_list.append(jet)\n",
"\n",
"jet_set1 = JetSet(jets=jet_list)\n",
"\n",
"#如上所说,每个喷注有很多粒子,你最后输入模型的是每个喷注中所有粒子的如下信息\n",
"#log_energy log_pt part_logerel part_logptrel part_deltaR part_charge part_d0 part_dz \n",
"#part_deta part_dphi particle_pid"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c9995622",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b94329b2",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"jet_set1.jets[0].jet_bbar"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "4242ce6c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.0\n",
"-1.0\n",
"1.0\n",
"0.0\n",
"-1.0\n",
"1.0\n",
"-1.0\n",
"0.0\n",
"-1.0\n",
"0.0\n",
"0.0\n",
"0.0\n",
"0.0\n",
"1.0\n",
"0.0\n",
"0.0\n",
"0.0\n"
]
}
],
"source": [
"for num in range(len(jet_set1.jets[0].particles)):\n",
" print(jet_set1.jets[0].particles[num].part_charge)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "6f0dc08e",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-1.0\n",
"1.0\n",
"-1.0\n",
"-1.0\n",
"0.0\n",
"1.0\n",
"-1.0\n",
"1.0\n",
"0.0\n",
"0.0\n",
"0.0\n",
"0.0\n",
"-1.0\n",
"0.0\n",
"0.0\n"
]
}
],
"source": [
"for num in range(len(jet_set1.jets[1].particles)):\n",
" print(jet_set1.jets[1].particles[num].part_charge)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "e6809f05",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10000\n"
]
}
],
"source": [
"print(len(jet_set1.jets))"
]
},
{
"cell_type": "markdown",
"id": "67cd5f19",
"metadata": {},
"source": [
"## 与particle-transformer对比的实验设计\n",
"\n",
"使用这些属性进行与particle-transformer准确率对比bb 100w bbbar 100w\n",
"\n",
"log_energy log_pt part_logerel part_logptrel part_deltaR part_charge part_d0 part_dz part_deta part_dphi particle_pid"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 5
}