{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "ac25f0ce-5583-4663-864c-65abd7eb0886", "metadata": { "tags": [] }, "outputs": [], "source": [ "from pymol import cmd" ] }, { "cell_type": "code", "execution_count": 4, "id": "173a1278-3198-492b-8ccd-78518aac858d", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 6, "id": "0a6c2235-fdbf-4f7f-bfb8-7b9795d3275c", "metadata": { "tags": [] }, "outputs": [], "source": [ "df = pd.read_excel(\"TCR-pHMC4.xlsx\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "36e1d2e3-d63e-482a-93ae-71587ccecfbe", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "215" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df[\"pdb_id\"])" ] }, { "cell_type": "code", "execution_count": 8, "id": "bf7825ab-2691-4cfd-9237-0c7c59a15426", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "173" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df[\"pdb_id\"].drop_duplicates())" ] }, { "cell_type": "code", "execution_count": 9, "id": "c0a2e052-e54e-43b0-8d12-fddc94f69e99", "metadata": { "tags": [] }, "outputs": [], "source": [ "pdb_id = df[\"pdb_id\"].drop_duplicates().to_list()" ] }, { "cell_type": "code", "execution_count": 16, "id": "21adf583-9374-4bdd-9f95-a333dad57e51", "metadata": { "tags": [] }, "outputs": [], "source": [ "pdb_id = [i for i in pdb_id if type(i) == str]" ] }, { "cell_type": "code", "execution_count": 17, "id": "fdd7b621-6005-4494-9cc5-aaaaeca087c8", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "['1ao7',\n", " '1mi5',\n", " '1oga',\n", " '2ypl',\n", " '3gsn',\n", " '3mv7',\n", " '3o4l',\n", " '3vxs',\n", " '4g9f',\n", " '4mji',\n", " '5d2l',\n", " '5d2n',\n", " '5e6i',\n", " '5isz',\n", " '5jhd',\n", " '5nht',\n", " '5tez',\n", " '6cql',\n", " '6dfs',\n", " '6dfw',\n", " '6dfx',\n", " '6l9l',\n", " '6mtm',\n", " '6px6',\n", " '6py2',\n", " '6rp9',\n", " '6rpa',\n", " '6rpb',\n", " '6rsy',\n", " '6tro',\n", " '6u3n',\n", " '6vmx',\n", " '6vrm',\n", " '6vrn',\n", " '7n1e',\n", " '7n1f',\n", " '7n6e',\n", " '2nx5',\n", " '2vlj',\n", " '2vlk',\n", " '2vlr',\n", " '3mv8',\n", " '3mv9',\n", " '4ftv',\n", " '4pri',\n", " '5euo',\n", " '5nqk',\n", " '7rtr',\n", " '5bs0',\n", " '5brz',\n", " '4eup',\n", " '3utt',\n", " '3qdg',\n", " '3hg1',\n", " '5e9d',\n", " '3qdm',\n", " '6vqo',\n", " '5men',\n", " '5yxn',\n", " '3pwp',\n", " '1bd2',\n", " '3h9s',\n", " '6amu',\n", " '5c0a',\n", " '5c0b',\n", " '5c0c',\n", " '5c08',\n", " '5nmg',\n", " '2bnr',\n", " '5nme',\n", " '6am5',\n", " '5eu6',\n", " '5c09',\n", " '5c07',\n", " '5wkh',\n", " '3vxm',\n", " '6avg',\n", " '6avf',\n", " '3ffc',\n", " '3sjv',\n", " '4qrp',\n", " '6bj2',\n", " '4jrx',\n", " '3dxa',\n", " '3kpr',\n", " '3kps',\n", " '6uon',\n", " '4p4k',\n", " '4grl',\n", " '4may',\n", " '3pl6',\n", " '5ks9',\n", " '4gg6',\n", " '4z7u',\n", " '4z7v',\n", " '4z7w',\n", " '4ozf',\n", " '4ozi',\n", " '6u3o',\n", " '5ksb',\n", " '5ksa',\n", " '2iam',\n", " '4e41',\n", " '1j8h',\n", " '3o6f',\n", " '4h1l',\n", " '1zgl',\n", " '7jwj',\n", " '5m02',\n", " '6g9q',\n", " '5wlg',\n", " '3pqy',\n", " '5ivx',\n", " '1mwa',\n", " '1fo0',\n", " '1kj2',\n", " '1nam',\n", " '1g6r',\n", " '2ol3',\n", " '3tpu',\n", " '4n0c',\n", " '3tfk',\n", " '2oi9',\n", " '3tf7',\n", " '4mvb',\n", " '4mxq',\n", " '4n5e',\n", " '3c60',\n", " '3c5z',\n", " '3rdt',\n", " '6mnn',\n", " '3mbe',\n", " '1d9k',\n", " '1u3h',\n", " '2z31',\n", " '4p2q',\n", " '4p2r',\n", " '3qiu',\n", " '3qib',\n", " '4p2o',\n", " '6bga',\n", " '6r2l',\n", " '6uk4',\n", " '6uln',\n", " '6vm8',\n", " '6zkw',\n", " '7dzm',\n", " '7ndq',\n", " '7l1d',\n", " '7rrg',\n", " '7na5',\n", " '7ow5',\n", " '7qpj',\n", " '7phr',\n", " '7nme',\n", " '8d5q',\n", " '8gvb',\n", " '7rk7',\n", " '7n2n',\n", " '8gom',\n", " '6r0e',\n", " '6xc9',\n", " '6xco',\n", " '7rdv',\n", " '7sg0',\n", " '7sg1',\n", " '7z50',\n", " '7t2c',\n", " '7t2b',\n", " '3gjf',\n", " '2p5e',\n", " '2g9h']" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pdb_id" ] }, { "cell_type": "code", "execution_count": 18, "id": "76562368-942c-47ec-ba62-8fdd991fba36", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Error-fetch: unable to load '5ksb'.\n", " Error-fetch: unable to load '7ndq'.\n", " Error-fetch: unable to load '6r0e'.\n" ] } ], "source": [ "for i, j in enumerate(pdb_id):\n", " cmd.fetch(j, type=\"pdb\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ccd5db18-1c40-4f71-af4d-d379e3812420", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 19, "id": "a9fcaf92-957f-43bf-b3ab-374d5aa9a531", "metadata": { "tags": [] }, "outputs": [], "source": [ "!mkdir -p data" ] }, { "cell_type": "code", "execution_count": 20, "id": "64ad88c8-0332-4744-9713-669a05948f2d", "metadata": { "tags": [] }, "outputs": [], "source": [ "!mv *.pdb data/" ] }, { "cell_type": "code", "execution_count": 22, "id": "c62b4e2b-9d2b-4d29-8382-eaa532f60e2e", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data/\n", "data/1ao7.pdb\n", "data/1bd2.pdb\n", "data/1d9k.pdb\n", "data/1fo0.pdb\n", "data/1g6r.pdb\n", "data/1j8h.pdb\n", "data/1kj2.pdb\n", "data/1mi5.pdb\n", "data/1mwa.pdb\n", "data/1nam.pdb\n", "data/1oga.pdb\n", "data/1u3h.pdb\n", "data/1zgl.pdb\n", "data/2bnr.pdb\n", "data/2g9h.pdb\n", "data/2iam.pdb\n", "data/2nx5.pdb\n", "data/2oi9.pdb\n", "data/2ol3.pdb\n", "data/2p5e.pdb\n", "data/2vlj.pdb\n", "data/2vlk.pdb\n", "data/2vlr.pdb\n", "data/2ypl.pdb\n", "data/2z31.pdb\n", "data/3c5z.pdb\n", "data/3c60.pdb\n", "data/3dxa.pdb\n", "data/3ffc.pdb\n", "data/3gjf.pdb\n", "data/3gsn.pdb\n", "data/3h9s.pdb\n", "data/3hg1.pdb\n", "data/3kpr.pdb\n", "data/3kps.pdb\n", "data/3mbe.pdb\n", "data/3mv7.pdb\n", "data/3mv8.pdb\n", "data/3mv9.pdb\n", "data/3o4l.pdb\n", "data/3o6f.pdb\n", "data/3pl6.pdb\n", "data/3pqy.pdb\n", "data/3pwp.pdb\n", "data/3qdg.pdb\n", "data/3qdm.pdb\n", "data/3qib.pdb\n", "data/3qiu.pdb\n", "data/3rdt.pdb\n", "data/3sjv.pdb\n", "data/3tf7.pdb\n", "data/3tfk.pdb\n", "data/3tpu.pdb\n", "data/3utt.pdb\n", "data/3vxm.pdb\n", "data/3vxs.pdb\n", "data/4e41.pdb\n", "data/4eup.pdb\n", "data/4ftv.pdb\n", "data/4g9f.pdb\n", "data/4gg6.pdb\n", "data/4grl.pdb\n", "data/4h1l.pdb\n", "data/4jrx.pdb\n", "data/4may.pdb\n", "data/4mji.pdb\n", "data/4mvb.pdb\n", "data/4mxq.pdb\n", "data/4n0c.pdb\n", "data/4n5e.pdb\n", "data/4ozf.pdb\n", "data/4ozi.pdb\n", "data/4p2o.pdb\n", "data/4p2q.pdb\n", "data/4p2r.pdb\n", "data/4p4k.pdb\n", "data/4pri.pdb\n", "data/4qrp.pdb\n", "data/4z7u.pdb\n", "data/4z7v.pdb\n", "data/4z7w.pdb\n", "data/5brz.pdb\n", "data/5bs0.pdb\n", "data/5c07.pdb\n", "data/5c08.pdb\n", "data/5c09.pdb\n", "data/5c0a.pdb\n", "data/5c0b.pdb\n", "data/5c0c.pdb\n", "data/5d2l.pdb\n", "data/5d2n.pdb\n", "data/5e6i.pdb\n", "data/5e9d.pdb\n", "data/5eu6.pdb\n", "data/5euo.pdb\n", "data/5isz.pdb\n", "data/5ivx.pdb\n", "data/5jhd.pdb\n", "data/5ks9.pdb\n", "data/5ksa.pdb\n", "data/5m02.pdb\n", "data/5men.pdb\n", "data/5nht.pdb\n", "data/5nme.pdb\n", "data/5nmg.pdb\n", "data/5nqk.pdb\n", "data/5tez.pdb\n", "data/5wkh.pdb\n", "data/5wlg.pdb\n", "data/5yxn.pdb\n", "data/6am5.pdb\n", "data/6amu.pdb\n", "data/6avf.pdb\n", "data/6avg.pdb\n", "data/6bga.pdb\n", "data/6bj2.pdb\n", "data/6cql.pdb\n", "data/6dfs.pdb\n", "data/6dfw.pdb\n", "data/6dfx.pdb\n", "data/6g9q.pdb\n", "data/6l9l.pdb\n", "data/6mnn.pdb\n", "data/6mtm.pdb\n", "data/6px6.pdb\n", "data/6py2.pdb\n", "data/6r2l.pdb\n", "data/6rp9.pdb\n", "data/6rpa.pdb\n", "data/6rpb.pdb\n", "data/6rsy.pdb\n", "data/6tro.pdb\n", "data/6u3n.pdb\n", "data/6u3o.pdb\n", "data/6uk4.pdb\n", "data/6uln.pdb\n", "data/6uon.pdb\n", "data/6vm8.pdb\n", "data/6vmx.pdb\n", "data/6vqo.pdb\n", "data/6vrm.pdb\n", "data/6vrn.pdb\n", "data/6xc9.pdb\n", "data/6xco.pdb\n", "data/6zkw.pdb\n", "data/7dzm.pdb\n", "data/7jwj.pdb\n", "data/7l1d.pdb\n", "data/7n1e.pdb\n", "data/7n1f.pdb\n", "data/7n2n.pdb\n", "data/7n6e.pdb\n", "data/7na5.pdb\n", "data/7nme.pdb\n", "data/7ow5.pdb\n", "data/7phr.pdb\n", "data/7qpj.pdb\n", "data/7rdv.pdb\n", "data/7rk7.pdb\n", "data/7rrg.pdb\n", "data/7rtr.pdb\n", "data/7sg0.pdb\n", "data/7sg1.pdb\n", "data/7t2b.pdb\n", "data/7t2c.pdb\n", "data/7z50.pdb\n", "data/8d5q.pdb\n", "data/8gom.pdb\n", "data/8gvb.pdb\n", "data/5ksb.pdb\n", "data/7ndq.cif\n", "data/6r0e.cif\n" ] } ], "source": [ "!tar zcvf data.tar.gz data/" ] }, { "cell_type": "code", "execution_count": null, "id": "d20fd480-1950-4e3b-af62-d11a15fef650", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "PDBfixer Environment", "language": "python", "name": "pdbfixer" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }