346 lines
11 KiB
Plaintext
346 lines
11 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "074cbeba",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import uproot\n",
|
||
"import attrs\n",
|
||
"from typing import List\n",
|
||
"import numpy as np"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "b49ac144",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"with uproot.open(\"./fast.root\") as f:\n",
|
||
" tree = f[\"tree\"]\n",
|
||
" a = tree.arrays(library=\"pd\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "c2d94275",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Index(['btag', 'ctag', 'gen_match', 'genpart_eta', 'genpart_phi',\n",
|
||
" 'genpart_pid', 'genpart_pt', 'is_signal', 'jet_energy', 'jet_eta',\n",
|
||
" 'jet_nparticles', 'jet_phi', 'jet_pt', 'part_charge', 'part_d0err',\n",
|
||
" 'part_d0val', 'part_deta', 'part_dphi', 'part_dzerr', 'part_dzval',\n",
|
||
" 'part_energy', 'part_pid', 'part_pt', 'part_px', 'part_py', 'part_pz',\n",
|
||
" 'part_isChargedHadron', 'part_isChargedKaon', 'part_isElectron',\n",
|
||
" 'part_isKLong', 'part_isKShort', 'part_isMuon', 'part_isNeutralHadron',\n",
|
||
" 'part_isPhoton', 'part_isPi0', 'part_isPion', 'part_isProton',\n",
|
||
" 'label_b', 'label_bb', 'label_bbar', 'label_c', 'label_cbar',\n",
|
||
" 'label_cc', 'label_d', 'label_dbar', 'label_g', 'label_gg', 'label_s',\n",
|
||
" 'label_sbar', 'label_u', 'label_ubar'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(a.keys())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "93a76758",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import attrs\n",
|
||
"import uproot\n",
|
||
"import numpy as np\n",
|
||
"from typing import List\n",
|
||
"@attrs.define\n",
|
||
"class ParticleBase:\n",
|
||
" part_charge: int = attrs.field() # charge of the particle\n",
|
||
" part_energy: float = attrs.field() # energy of the particle\n",
|
||
" part_px: float = attrs.field() # x-component of the momentum vector\n",
|
||
" part_py: float = attrs.field() # y-component of the momentum vector\n",
|
||
" part_pz: float = attrs.field() # z-component of the momentum vector\n",
|
||
" log_energy: float = attrs.field() # log10(part_energy)\n",
|
||
" log_pt: float = attrs.field() # log10(part_pt)\n",
|
||
" part_deta: float = attrs.field() # pseudorapidity\n",
|
||
" part_dphi: float = attrs.field() # azimuthal angle\n",
|
||
" part_logptrel: float = attrs.field() # log10(pt(particle)/pt(jet))\n",
|
||
" part_logerel: float = attrs.field() # log10(energy(particle)/energy(jet))\n",
|
||
" part_deltaR: float = attrs.field() # distance between the particle and the jet\n",
|
||
" part_d0: float = attrs.field() # tanh(d0)\n",
|
||
" part_dz: float = attrs.field() # tanh(z0)\n",
|
||
" particle_type: str = attrs.field() # type of the particle (e.g. charged kaon, charged pion, proton, electron, muon, neutral hadron, photon, others)\n",
|
||
" particle_pid: int = attrs.field() # pid of the particle (e.g. 0,1,2,3,4,5,6,7)\n",
|
||
"\n",
|
||
"@attrs.define\n",
|
||
"class Jet:\n",
|
||
" jet_b: float = attrs.field()\n",
|
||
" jet_bbar: float = attrs.field()\n",
|
||
" jet_energy: float = attrs.field() # energy of the jet\n",
|
||
" jet_pt: float = attrs.field() # transverse momentum of the jet\n",
|
||
" jet_eta: float = attrs.field() # pseudorapidity of the jet\n",
|
||
" particles: List[ParticleBase] = attrs.field(factory=list) # list of particles in the jet\n",
|
||
" \n",
|
||
" def __len__(self):\n",
|
||
" return len(self.particles)\n",
|
||
" \n",
|
||
"@attrs.define\n",
|
||
"class JetSet:\n",
|
||
" jets: List[Jet]\n",
|
||
" \n",
|
||
" def __len__(self):\n",
|
||
" return len(self.jets)\n",
|
||
" \n",
|
||
"def jud_type(jtmp): #这个函数用来判断每个粒子的类型,每个粒子可以是electron、muon、pion 等\n",
|
||
" particle_dict = {'NeutralHadron':0,'Photon':1, 'Electron':2, 'Muon':3, 'Pion':4,'ChargedKaon':5, 'Proton':6}\n",
|
||
" max_element = max(jtmp)\n",
|
||
" idx = jtmp.index(max_element)\n",
|
||
" items = list(particle_dict.items())\n",
|
||
" return items[idx][0], items[idx][1]\n",
|
||
" \n",
|
||
"with uproot.open(\"./data/data_fast/fast_bb.root\") as f:\n",
|
||
" tree = f[\"tree\"]\n",
|
||
" a = tree.arrays(library=\"pd\")\n",
|
||
"#a里面有很多喷注,我们的目标是判断每个喷注 它是 b 还是 bbar. 每个喷注里含了很多粒子。以下以 part 开头的变量都是列表,比如 part_pt,它是存储了一个喷注中所有粒子的pt\n",
|
||
"#part_energy 存储了一个喷注中所有粒子的energy。\n",
|
||
"\n",
|
||
"\n",
|
||
"jet_list = []\n",
|
||
"for j in a.itertuples(): \n",
|
||
" part_pt = np.array(j.part_pt)\n",
|
||
" jet_pt = np.array(j.jet_pt)\n",
|
||
" part_logptrel = np.log(np.divide(part_pt, jet_pt))\n",
|
||
" \n",
|
||
" part_energy = np.array(j.part_energy)\n",
|
||
" jet_energy = np.array(j.jet_energy)\n",
|
||
" part_logerel = np.log(np.divide(part_energy, jet_energy))\n",
|
||
" \n",
|
||
" part_deta = np.array(j.part_deta)\n",
|
||
" part_dphi = np.array(j.part_dphi)\n",
|
||
" part_deltaR = np.hypot(part_deta, part_dphi)\n",
|
||
" \n",
|
||
" assert len(j.part_pt) == len(j.part_energy) == len(j.part_deta)\n",
|
||
"\n",
|
||
" particles = []\n",
|
||
" \n",
|
||
" \n",
|
||
" particle_list = ['part_isNeutralHadron','part_isPhoton', 'part_isElectron', 'part_isMuon', 'part_isPion','part_isChargedKaon', 'part_isProton']\n",
|
||
" part_type = []\n",
|
||
" part_pid = []\n",
|
||
" for pn in range(len(j.part_pt)):\n",
|
||
" jtmp = [j.part_isNeutralHadron[pn], j.part_isPhoton[pn], j.part_isElectron[pn], j.part_isMuon[pn], j.part_isPion[pn],\n",
|
||
" j.part_isChargedKaon[pn], j.part_isProton[pn]]\n",
|
||
" tmp_type, tmp_pid = jud_type(jtmp)\n",
|
||
" part_type.append(tmp_type)\n",
|
||
" part_pid.append(tmp_pid)\n",
|
||
" \n",
|
||
" bag = zip(j.part_charge, j.part_energy, j.part_px, j.part_py, j.part_pz, np.log(j.part_energy), \n",
|
||
" np.log(j.part_pt), j.part_deta, j.part_dphi, part_logptrel, part_logerel, part_deltaR, \n",
|
||
" np.tanh(j.part_d0val), np.tanh(j.part_dzval), part_type, part_pid)\n",
|
||
" \n",
|
||
" #下边的代码是要对第 j 个喷注中的所有粒子做循环,将每个粒子都 存成 ParticleBase,然后 append 到 particles里,\n",
|
||
" #所以 partices 存储了 第 j 个喷注中所有粒子的信息\n",
|
||
" for c, en, px, py, pz, lEn, lPt, eta, phi, ii, jj, kk, d0, dz, ptype, pid in bag:\n",
|
||
" particles.append(ParticleBase(\n",
|
||
" part_charge=c, \n",
|
||
" part_energy=en, \n",
|
||
" part_px=px, \n",
|
||
" part_py=py,\n",
|
||
" part_pz=pz, \n",
|
||
" log_energy=lEn, \n",
|
||
" log_pt=lPt,\n",
|
||
" part_deta=eta, \n",
|
||
" part_dphi=phi, \n",
|
||
" part_logptrel=ii,\n",
|
||
" part_logerel=jj, \n",
|
||
" part_deltaR=kk,\n",
|
||
" part_d0=d0, \n",
|
||
" part_dz=dz, \n",
|
||
" particle_type=ptype, # assuming you will set this correctly\n",
|
||
" particle_pid=pid # assuming you will set this correctly\n",
|
||
" ))\n",
|
||
" # add jets jet = 喷注,\n",
|
||
" jet = Jet(\n",
|
||
" jet_b=j.label_b, #如果此jet是b,那么label_b = 1, 否则label_b = 0\n",
|
||
" jet_bbar=j.label_bbar, #如果此jet是bbar,那么label_bbar = 1\n",
|
||
" jet_energy=j.jet_energy, #第 j 个喷注的 energy\n",
|
||
" jet_pt=j.jet_pt, # 第 j 个喷注的 pt\n",
|
||
" jet_eta=j.jet_eta, # 第 j 个喷注的 eta (是一种角度的表示)\n",
|
||
" particles=particles # 第 j 个喷注中所有的 粒子\n",
|
||
" )\n",
|
||
" jet_list.append(jet)\n",
|
||
"\n",
|
||
"jet_set1 = JetSet(jets=jet_list)\n",
|
||
"\n",
|
||
"#如上所说,每个喷注有很多粒子,你最后输入模型的是每个喷注中所有粒子的如下信息\n",
|
||
"#log_energy log_pt part_logerel part_logptrel part_deltaR part_charge part_d0 part_dz \n",
|
||
"#part_deta part_dphi particle_pid"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "c9995622",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "b94329b2",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"False"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"jet_set1.jets[0].jet_bbar"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "4242ce6c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"1.0\n",
|
||
"-1.0\n",
|
||
"1.0\n",
|
||
"0.0\n",
|
||
"-1.0\n",
|
||
"1.0\n",
|
||
"-1.0\n",
|
||
"0.0\n",
|
||
"-1.0\n",
|
||
"0.0\n",
|
||
"0.0\n",
|
||
"0.0\n",
|
||
"0.0\n",
|
||
"1.0\n",
|
||
"0.0\n",
|
||
"0.0\n",
|
||
"0.0\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"for num in range(len(jet_set1.jets[0].particles)):\n",
|
||
" print(jet_set1.jets[0].particles[num].part_charge)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "6f0dc08e",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"-1.0\n",
|
||
"1.0\n",
|
||
"-1.0\n",
|
||
"-1.0\n",
|
||
"0.0\n",
|
||
"1.0\n",
|
||
"-1.0\n",
|
||
"1.0\n",
|
||
"0.0\n",
|
||
"0.0\n",
|
||
"0.0\n",
|
||
"0.0\n",
|
||
"-1.0\n",
|
||
"0.0\n",
|
||
"0.0\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"for num in range(len(jet_set1.jets[1].particles)):\n",
|
||
" print(jet_set1.jets[1].particles[num].part_charge)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "e6809f05",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"10000\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(len(jet_set1.jets))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "67cd5f19",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 与particle-transformer对比的实验设计\n",
|
||
"\n",
|
||
"使用这些属性进行与particle-transformer准确率对比,bb 100w bbbar 100w\n",
|
||
"\n",
|
||
"log_energy log_pt part_logerel part_logptrel part_deltaR part_charge part_d0 part_dz part_deta part_dphi particle_pid"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.14"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|