diff --git a/download.ipynb b/download.ipynb new file mode 100644 index 0000000..9121877 --- /dev/null +++ b/download.ipynb @@ -0,0 +1,580 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "ac25f0ce-5583-4663-864c-65abd7eb0886", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from pymol import cmd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "173a1278-3198-492b-8ccd-78518aac858d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0a6c2235-fdbf-4f7f-bfb8-7b9795d3275c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "df = pd.read_excel(\"TCR-pHMC4.xlsx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "36e1d2e3-d63e-482a-93ae-71587ccecfbe", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "215" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df[\"pdb_id\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "bf7825ab-2691-4cfd-9237-0c7c59a15426", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "173" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df[\"pdb_id\"].drop_duplicates())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c0a2e052-e54e-43b0-8d12-fddc94f69e99", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "pdb_id = df[\"pdb_id\"].drop_duplicates().to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "21adf583-9374-4bdd-9f95-a333dad57e51", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "pdb_id = [i for i in pdb_id if type(i) == str]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "fdd7b621-6005-4494-9cc5-aaaaeca087c8", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['1ao7',\n", + " '1mi5',\n", + " '1oga',\n", + " '2ypl',\n", + " '3gsn',\n", + " '3mv7',\n", + " '3o4l',\n", + " '3vxs',\n", + " '4g9f',\n", + " '4mji',\n", + " '5d2l',\n", + " '5d2n',\n", + " '5e6i',\n", + " '5isz',\n", + " '5jhd',\n", + " '5nht',\n", + " '5tez',\n", + " '6cql',\n", + " '6dfs',\n", + " '6dfw',\n", + " '6dfx',\n", + " '6l9l',\n", + " '6mtm',\n", + " '6px6',\n", + " '6py2',\n", + " '6rp9',\n", + " '6rpa',\n", + " '6rpb',\n", + " '6rsy',\n", + " '6tro',\n", + " '6u3n',\n", + " '6vmx',\n", + " '6vrm',\n", + " '6vrn',\n", + " '7n1e',\n", + " '7n1f',\n", + " '7n6e',\n", + " '2nx5',\n", + " '2vlj',\n", + " '2vlk',\n", + " '2vlr',\n", + " '3mv8',\n", + " '3mv9',\n", + " '4ftv',\n", + " '4pri',\n", + " '5euo',\n", + " '5nqk',\n", + " '7rtr',\n", + " '5bs0',\n", + " '5brz',\n", + " '4eup',\n", + " '3utt',\n", + " '3qdg',\n", + " '3hg1',\n", + " '5e9d',\n", + " '3qdm',\n", + " '6vqo',\n", + " '5men',\n", + " '5yxn',\n", + " '3pwp',\n", + " '1bd2',\n", + " '3h9s',\n", + " '6amu',\n", + " '5c0a',\n", + " '5c0b',\n", + " '5c0c',\n", + " '5c08',\n", + " '5nmg',\n", + " '2bnr',\n", + " '5nme',\n", + " '6am5',\n", + " '5eu6',\n", + " '5c09',\n", + " '5c07',\n", + " '5wkh',\n", + " '3vxm',\n", + " '6avg',\n", + " '6avf',\n", + " '3ffc',\n", + " '3sjv',\n", + " '4qrp',\n", + " '6bj2',\n", + " '4jrx',\n", + " '3dxa',\n", + " '3kpr',\n", + " '3kps',\n", + " '6uon',\n", + " '4p4k',\n", + " '4grl',\n", + " '4may',\n", + " '3pl6',\n", + " '5ks9',\n", + " '4gg6',\n", + " '4z7u',\n", + " '4z7v',\n", + " '4z7w',\n", + " '4ozf',\n", + " '4ozi',\n", + " '6u3o',\n", + " '5ksb',\n", + " '5ksa',\n", + " '2iam',\n", + " '4e41',\n", + " '1j8h',\n", + " '3o6f',\n", + " '4h1l',\n", + " '1zgl',\n", + " '7jwj',\n", + " '5m02',\n", + " '6g9q',\n", + " '5wlg',\n", + " '3pqy',\n", + " '5ivx',\n", + " '1mwa',\n", + " '1fo0',\n", + " '1kj2',\n", + " '1nam',\n", + " '1g6r',\n", + " '2ol3',\n", + " '3tpu',\n", + " '4n0c',\n", + " '3tfk',\n", + " '2oi9',\n", + " '3tf7',\n", + " '4mvb',\n", + " '4mxq',\n", + " '4n5e',\n", + " '3c60',\n", + " '3c5z',\n", + " '3rdt',\n", + " '6mnn',\n", + " '3mbe',\n", + " '1d9k',\n", + " '1u3h',\n", + " '2z31',\n", + " '4p2q',\n", + " '4p2r',\n", + " '3qiu',\n", + " '3qib',\n", + " '4p2o',\n", + " '6bga',\n", + " '6r2l',\n", + " '6uk4',\n", + " '6uln',\n", + " '6vm8',\n", + " '6zkw',\n", + " '7dzm',\n", + " '7ndq',\n", + " '7l1d',\n", + " '7rrg',\n", + " '7na5',\n", + " '7ow5',\n", + " '7qpj',\n", + " '7phr',\n", + " '7nme',\n", + " '8d5q',\n", + " '8gvb',\n", + " '7rk7',\n", + " '7n2n',\n", + " '8gom',\n", + " '6r0e',\n", + " '6xc9',\n", + " '6xco',\n", + " '7rdv',\n", + " '7sg0',\n", + " '7sg1',\n", + " '7z50',\n", + " '7t2c',\n", + " '7t2b',\n", + " '3gjf',\n", + " '2p5e',\n", + " '2g9h']" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pdb_id" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "76562368-942c-47ec-ba62-8fdd991fba36", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Error-fetch: unable to load '5ksb'.\n", + " Error-fetch: unable to load '7ndq'.\n", + " Error-fetch: unable to load '6r0e'.\n" + ] + } + ], + "source": [ + "for i, j in enumerate(pdb_id):\n", + " cmd.fetch(j, type=\"pdb\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccd5db18-1c40-4f71-af4d-d379e3812420", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a9fcaf92-957f-43bf-b3ab-374d5aa9a531", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!mkdir -p data" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "64ad88c8-0332-4744-9713-669a05948f2d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!mv *.pdb data/" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "c62b4e2b-9d2b-4d29-8382-eaa532f60e2e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "data/\n", + "data/1ao7.pdb\n", + "data/1bd2.pdb\n", + "data/1d9k.pdb\n", + "data/1fo0.pdb\n", + "data/1g6r.pdb\n", + "data/1j8h.pdb\n", + "data/1kj2.pdb\n", + "data/1mi5.pdb\n", + "data/1mwa.pdb\n", + "data/1nam.pdb\n", + "data/1oga.pdb\n", + "data/1u3h.pdb\n", + "data/1zgl.pdb\n", + "data/2bnr.pdb\n", + "data/2g9h.pdb\n", + "data/2iam.pdb\n", + "data/2nx5.pdb\n", + "data/2oi9.pdb\n", + "data/2ol3.pdb\n", + "data/2p5e.pdb\n", + "data/2vlj.pdb\n", + "data/2vlk.pdb\n", + "data/2vlr.pdb\n", + "data/2ypl.pdb\n", + "data/2z31.pdb\n", + "data/3c5z.pdb\n", + "data/3c60.pdb\n", + "data/3dxa.pdb\n", + "data/3ffc.pdb\n", + "data/3gjf.pdb\n", + "data/3gsn.pdb\n", + "data/3h9s.pdb\n", + "data/3hg1.pdb\n", + "data/3kpr.pdb\n", + "data/3kps.pdb\n", + "data/3mbe.pdb\n", + "data/3mv7.pdb\n", + "data/3mv8.pdb\n", + "data/3mv9.pdb\n", + "data/3o4l.pdb\n", + "data/3o6f.pdb\n", + "data/3pl6.pdb\n", + "data/3pqy.pdb\n", + "data/3pwp.pdb\n", + "data/3qdg.pdb\n", + "data/3qdm.pdb\n", + "data/3qib.pdb\n", + "data/3qiu.pdb\n", + "data/3rdt.pdb\n", + "data/3sjv.pdb\n", + "data/3tf7.pdb\n", + "data/3tfk.pdb\n", + "data/3tpu.pdb\n", + "data/3utt.pdb\n", + "data/3vxm.pdb\n", + "data/3vxs.pdb\n", + "data/4e41.pdb\n", + "data/4eup.pdb\n", + "data/4ftv.pdb\n", + "data/4g9f.pdb\n", + "data/4gg6.pdb\n", + "data/4grl.pdb\n", + "data/4h1l.pdb\n", + "data/4jrx.pdb\n", + "data/4may.pdb\n", + "data/4mji.pdb\n", + "data/4mvb.pdb\n", + "data/4mxq.pdb\n", + "data/4n0c.pdb\n", + "data/4n5e.pdb\n", + "data/4ozf.pdb\n", + "data/4ozi.pdb\n", + "data/4p2o.pdb\n", + "data/4p2q.pdb\n", + "data/4p2r.pdb\n", + "data/4p4k.pdb\n", + "data/4pri.pdb\n", + "data/4qrp.pdb\n", + "data/4z7u.pdb\n", + "data/4z7v.pdb\n", + "data/4z7w.pdb\n", + "data/5brz.pdb\n", + "data/5bs0.pdb\n", + "data/5c07.pdb\n", + "data/5c08.pdb\n", + "data/5c09.pdb\n", + "data/5c0a.pdb\n", + "data/5c0b.pdb\n", + "data/5c0c.pdb\n", + "data/5d2l.pdb\n", + "data/5d2n.pdb\n", + "data/5e6i.pdb\n", + "data/5e9d.pdb\n", + "data/5eu6.pdb\n", + "data/5euo.pdb\n", + "data/5isz.pdb\n", + "data/5ivx.pdb\n", + "data/5jhd.pdb\n", + "data/5ks9.pdb\n", + "data/5ksa.pdb\n", + "data/5m02.pdb\n", + "data/5men.pdb\n", + "data/5nht.pdb\n", + "data/5nme.pdb\n", + "data/5nmg.pdb\n", + "data/5nqk.pdb\n", + "data/5tez.pdb\n", + "data/5wkh.pdb\n", + "data/5wlg.pdb\n", + "data/5yxn.pdb\n", + "data/6am5.pdb\n", + "data/6amu.pdb\n", + "data/6avf.pdb\n", + "data/6avg.pdb\n", + "data/6bga.pdb\n", + "data/6bj2.pdb\n", + "data/6cql.pdb\n", + "data/6dfs.pdb\n", + "data/6dfw.pdb\n", + "data/6dfx.pdb\n", + "data/6g9q.pdb\n", + "data/6l9l.pdb\n", + "data/6mnn.pdb\n", + "data/6mtm.pdb\n", + "data/6px6.pdb\n", + "data/6py2.pdb\n", + "data/6r2l.pdb\n", + "data/6rp9.pdb\n", + "data/6rpa.pdb\n", + "data/6rpb.pdb\n", + "data/6rsy.pdb\n", + "data/6tro.pdb\n", + "data/6u3n.pdb\n", + "data/6u3o.pdb\n", + "data/6uk4.pdb\n", + "data/6uln.pdb\n", + "data/6uon.pdb\n", + "data/6vm8.pdb\n", + "data/6vmx.pdb\n", + "data/6vqo.pdb\n", + "data/6vrm.pdb\n", + "data/6vrn.pdb\n", + "data/6xc9.pdb\n", + "data/6xco.pdb\n", + "data/6zkw.pdb\n", + "data/7dzm.pdb\n", + "data/7jwj.pdb\n", + "data/7l1d.pdb\n", + "data/7n1e.pdb\n", + "data/7n1f.pdb\n", + "data/7n2n.pdb\n", + "data/7n6e.pdb\n", + "data/7na5.pdb\n", + "data/7nme.pdb\n", + "data/7ow5.pdb\n", + "data/7phr.pdb\n", + "data/7qpj.pdb\n", + "data/7rdv.pdb\n", + "data/7rk7.pdb\n", + "data/7rrg.pdb\n", + "data/7rtr.pdb\n", + "data/7sg0.pdb\n", + "data/7sg1.pdb\n", + "data/7t2b.pdb\n", + "data/7t2c.pdb\n", + "data/7z50.pdb\n", + "data/8d5q.pdb\n", + "data/8gom.pdb\n", + "data/8gvb.pdb\n", + "data/5ksb.pdb\n", + "data/7ndq.cif\n", + "data/6r0e.cif\n" + ] + } + ], + "source": [ + "!tar zcvf data.tar.gz data/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d20fd480-1950-4e3b-af62-d11a15fef650", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PDBfixer Environment", + "language": "python", + "name": "pdbfixer" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}