{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "074cbeba", "metadata": {}, "outputs": [], "source": [ "import uproot\n", "import attrs\n", "from typing import List\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "id": "b49ac144", "metadata": {}, "outputs": [], "source": [ "with uproot.open(\"./fast.root\") as f:\n", " tree = f[\"tree\"]\n", " a = tree.arrays(library=\"pd\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "c2d94275", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['btag', 'ctag', 'gen_match', 'genpart_eta', 'genpart_phi',\n", " 'genpart_pid', 'genpart_pt', 'is_signal', 'jet_energy', 'jet_eta',\n", " 'jet_nparticles', 'jet_phi', 'jet_pt', 'part_charge', 'part_d0err',\n", " 'part_d0val', 'part_deta', 'part_dphi', 'part_dzerr', 'part_dzval',\n", " 'part_energy', 'part_pid', 'part_pt', 'part_px', 'part_py', 'part_pz',\n", " 'part_isChargedHadron', 'part_isChargedKaon', 'part_isElectron',\n", " 'part_isKLong', 'part_isKShort', 'part_isMuon', 'part_isNeutralHadron',\n", " 'part_isPhoton', 'part_isPi0', 'part_isPion', 'part_isProton',\n", " 'label_b', 'label_bb', 'label_bbar', 'label_c', 'label_cbar',\n", " 'label_cc', 'label_d', 'label_dbar', 'label_g', 'label_gg', 'label_s',\n", " 'label_sbar', 'label_u', 'label_ubar'],\n", " dtype='object')\n" ] } ], "source": [ "print(a.keys())" ] }, { "cell_type": "code", "execution_count": 4, "id": "93a76758", "metadata": {}, "outputs": [], "source": [ "import attrs\n", "import uproot\n", "import numpy as np\n", "from typing import List\n", "@attrs.define\n", "class ParticleBase:\n", " part_charge: int = attrs.field() # charge of the particle\n", " part_energy: float = attrs.field() # energy of the particle\n", " part_px: float = attrs.field() # x-component of the momentum vector\n", " part_py: float = attrs.field() # y-component of the momentum vector\n", " part_pz: float = attrs.field() # z-component of the momentum vector\n", " log_energy: float = attrs.field() # log10(part_energy)\n", " log_pt: float = attrs.field() # log10(part_pt)\n", " part_deta: float = attrs.field() # pseudorapidity\n", " part_dphi: float = attrs.field() # azimuthal angle\n", " part_logptrel: float = attrs.field() # log10(pt(particle)/pt(jet))\n", " part_logerel: float = attrs.field() # log10(energy(particle)/energy(jet))\n", " part_deltaR: float = attrs.field() # distance between the particle and the jet\n", " part_d0: float = attrs.field() # tanh(d0)\n", " part_dz: float = attrs.field() # tanh(z0)\n", " particle_type: str = attrs.field() # type of the particle (e.g. charged kaon, charged pion, proton, electron, muon, neutral hadron, photon, others)\n", " particle_pid: int = attrs.field() # pid of the particle (e.g. 0,1,2,3,4,5,6,7)\n", "\n", "@attrs.define\n", "class Jet:\n", " jet_b: float = attrs.field()\n", " jet_bbar: float = attrs.field()\n", " jet_energy: float = attrs.field() # energy of the jet\n", " jet_pt: float = attrs.field() # transverse momentum of the jet\n", " jet_eta: float = attrs.field() # pseudorapidity of the jet\n", " particles: List[ParticleBase] = attrs.field(factory=list) # list of particles in the jet\n", " \n", " def __len__(self):\n", " return len(self.particles)\n", " \n", "@attrs.define\n", "class JetSet:\n", " jets: List[Jet]\n", " \n", " def __len__(self):\n", " return len(self.jets)\n", " \n", "def jud_type(jtmp): #这个函数用来判断每个粒子的类型,每个粒子可以是electron、muon、pion 等\n", " particle_dict = {'NeutralHadron':0,'Photon':1, 'Electron':2, 'Muon':3, 'Pion':4,'ChargedKaon':5, 'Proton':6}\n", " max_element = max(jtmp)\n", " idx = jtmp.index(max_element)\n", " items = list(particle_dict.items())\n", " return items[idx][0], items[idx][1]\n", " \n", "with uproot.open(\"./data/data_fast/fast_bb.root\") as f:\n", " tree = f[\"tree\"]\n", " a = tree.arrays(library=\"pd\")\n", "#a里面有很多喷注,我们的目标是判断每个喷注 它是 b 还是 bbar. 每个喷注里含了很多粒子。以下以 part 开头的变量都是列表,比如 part_pt,它是存储了一个喷注中所有粒子的pt\n", "#part_energy 存储了一个喷注中所有粒子的energy。\n", "\n", "\n", "jet_list = []\n", "for j in a.itertuples(): \n", " part_pt = np.array(j.part_pt)\n", " jet_pt = np.array(j.jet_pt)\n", " part_logptrel = np.log(np.divide(part_pt, jet_pt))\n", " \n", " part_energy = np.array(j.part_energy)\n", " jet_energy = np.array(j.jet_energy)\n", " part_logerel = np.log(np.divide(part_energy, jet_energy))\n", " \n", " part_deta = np.array(j.part_deta)\n", " part_dphi = np.array(j.part_dphi)\n", " part_deltaR = np.hypot(part_deta, part_dphi)\n", " \n", " assert len(j.part_pt) == len(j.part_energy) == len(j.part_deta)\n", "\n", " particles = []\n", " \n", " \n", " particle_list = ['part_isNeutralHadron','part_isPhoton', 'part_isElectron', 'part_isMuon', 'part_isPion','part_isChargedKaon', 'part_isProton']\n", " part_type = []\n", " part_pid = []\n", " for pn in range(len(j.part_pt)):\n", " jtmp = [j.part_isNeutralHadron[pn], j.part_isPhoton[pn], j.part_isElectron[pn], j.part_isMuon[pn], j.part_isPion[pn],\n", " j.part_isChargedKaon[pn], j.part_isProton[pn]]\n", " tmp_type, tmp_pid = jud_type(jtmp)\n", " part_type.append(tmp_type)\n", " part_pid.append(tmp_pid)\n", " \n", " bag = zip(j.part_charge, j.part_energy, j.part_px, j.part_py, j.part_pz, np.log(j.part_energy), \n", " np.log(j.part_pt), j.part_deta, j.part_dphi, part_logptrel, part_logerel, part_deltaR, \n", " np.tanh(j.part_d0val), np.tanh(j.part_dzval), part_type, part_pid)\n", " \n", " #下边的代码是要对第 j 个喷注中的所有粒子做循环,将每个粒子都 存成 ParticleBase,然后 append 到 particles里,\n", " #所以 partices 存储了 第 j 个喷注中所有粒子的信息\n", " for c, en, px, py, pz, lEn, lPt, eta, phi, ii, jj, kk, d0, dz, ptype, pid in bag:\n", " particles.append(ParticleBase(\n", " part_charge=c, \n", " part_energy=en, \n", " part_px=px, \n", " part_py=py,\n", " part_pz=pz, \n", " log_energy=lEn, \n", " log_pt=lPt,\n", " part_deta=eta, \n", " part_dphi=phi, \n", " part_logptrel=ii,\n", " part_logerel=jj, \n", " part_deltaR=kk,\n", " part_d0=d0, \n", " part_dz=dz, \n", " particle_type=ptype, # assuming you will set this correctly\n", " particle_pid=pid # assuming you will set this correctly\n", " ))\n", " # add jets jet = 喷注,\n", " jet = Jet(\n", " jet_b=j.label_b, #如果此jet是b,那么label_b = 1, 否则label_b = 0\n", " jet_bbar=j.label_bbar, #如果此jet是bbar,那么label_bbar = 1\n", " jet_energy=j.jet_energy, #第 j 个喷注的 energy\n", " jet_pt=j.jet_pt, # 第 j 个喷注的 pt\n", " jet_eta=j.jet_eta, # 第 j 个喷注的 eta (是一种角度的表示)\n", " particles=particles # 第 j 个喷注中所有的 粒子\n", " )\n", " jet_list.append(jet)\n", "\n", "jet_set1 = JetSet(jets=jet_list)\n", "\n", "#如上所说,每个喷注有很多粒子,你最后输入模型的是每个喷注中所有粒子的如下信息\n", "#log_energy log_pt part_logerel part_logptrel part_deltaR part_charge part_d0 part_dz \n", "#part_deta part_dphi particle_pid" ] }, { "cell_type": "code", "execution_count": null, "id": "c9995622", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "id": "b94329b2", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jet_set1.jets[0].jet_bbar" ] }, { "cell_type": "code", "execution_count": 10, "id": "4242ce6c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0\n", "-1.0\n", "1.0\n", "0.0\n", "-1.0\n", "1.0\n", "-1.0\n", "0.0\n", "-1.0\n", "0.0\n", "0.0\n", "0.0\n", "0.0\n", "1.0\n", "0.0\n", "0.0\n", "0.0\n" ] } ], "source": [ "for num in range(len(jet_set1.jets[0].particles)):\n", " print(jet_set1.jets[0].particles[num].part_charge)" ] }, { "cell_type": "code", "execution_count": 11, "id": "6f0dc08e", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-1.0\n", "1.0\n", "-1.0\n", "-1.0\n", "0.0\n", "1.0\n", "-1.0\n", "1.0\n", "0.0\n", "0.0\n", "0.0\n", "0.0\n", "-1.0\n", "0.0\n", "0.0\n" ] } ], "source": [ "for num in range(len(jet_set1.jets[1].particles)):\n", " print(jet_set1.jets[1].particles[num].part_charge)" ] }, { "cell_type": "code", "execution_count": 12, "id": "e6809f05", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10000\n" ] } ], "source": [ "print(len(jet_set1.jets))" ] }, { "cell_type": "markdown", "id": "67cd5f19", "metadata": {}, "source": [ "## 与particle-transformer对比的实验设计\n", "\n", "使用这些属性进行与particle-transformer准确率对比,bb 100w bbbar 100w\n", "\n", "log_energy log_pt part_logerel part_logptrel part_deltaR part_charge part_d0 part_dz part_deta part_dphi particle_pid" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 5 }