{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "5782da1e", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 4, "id": "73a1eb00", "metadata": {}, "outputs": [], "source": [ "df=pd.read_csv('pairs.txt', sep=\"\\t\", header=None)" ] }, { "cell_type": "code", "execution_count": 5, "id": "5fe3f547", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
0chr12499chr12723
1chr13495chr13795
2chr13519chr13735
3chr13641chr172429988
4chr14866chr15022
...............
4587308chrZ82309380chrZ82036721
4587309chrZ82309559chrZ82293151
4587310chrZ82309416chrZ82309572
4587311chrZ82309516chrZ82309709
4587312chrZ82309887chrZ82090383
\n", "

4587313 rows × 4 columns

\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 chr1 2499 chr1 2723\n", "1 chr1 3495 chr1 3795\n", "2 chr1 3519 chr1 3735\n", "3 chr1 3641 chr1 72429988\n", "4 chr1 4866 chr1 5022\n", "... ... ... ... ...\n", "4587308 chrZ 82309380 chrZ 82036721\n", "4587309 chrZ 82309559 chrZ 82293151\n", "4587310 chrZ 82309416 chrZ 82309572\n", "4587311 chrZ 82309516 chrZ 82309709\n", "4587312 chrZ 82309887 chrZ 82090383\n", "\n", "[4587313 rows x 4 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 7, "id": "20441b6e", "metadata": {}, "outputs": [], "source": [ "df.columns=['c1', 'p1', 'c2', 'p2']" ] }, { "cell_type": "code", "execution_count": 8, "id": "c6408627", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c1p1c2p2
0chr12499chr12723
1chr13495chr13795
2chr13519chr13735
3chr13641chr172429988
4chr14866chr15022
...............
4587308chrZ82309380chrZ82036721
4587309chrZ82309559chrZ82293151
4587310chrZ82309416chrZ82309572
4587311chrZ82309516chrZ82309709
4587312chrZ82309887chrZ82090383
\n", "

4525219 rows × 4 columns

\n", "
" ], "text/plain": [ " c1 p1 c2 p2\n", "0 chr1 2499 chr1 2723\n", "1 chr1 3495 chr1 3795\n", "2 chr1 3519 chr1 3735\n", "3 chr1 3641 chr1 72429988\n", "4 chr1 4866 chr1 5022\n", "... ... ... ... ...\n", "4587308 chrZ 82309380 chrZ 82036721\n", "4587309 chrZ 82309559 chrZ 82293151\n", "4587310 chrZ 82309416 chrZ 82309572\n", "4587311 chrZ 82309516 chrZ 82309709\n", "4587312 chrZ 82309887 chrZ 82090383\n", "\n", "[4525219 rows x 4 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.query('c1==c2')" ] }, { "cell_type": "code", "execution_count": 9, "id": "dc0d3cfd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c1p1c2p2
0chr12499chr12723
1chr13495chr13795
2chr13519chr13735
3chr13641chr172429988
4chr14866chr15022
...............
4587285chrZ82307023chrZ82307302
4587297chrZ82308869chrZ82309117
4587298chrZ82309036chrZ82309308
4587310chrZ82309416chrZ82309572
4587311chrZ82309516chrZ82309709
\n", "

4356414 rows × 4 columns

\n", "
" ], "text/plain": [ " c1 p1 c2 p2\n", "0 chr1 2499 chr1 2723\n", "1 chr1 3495 chr1 3795\n", "2 chr1 3519 chr1 3735\n", "3 chr1 3641 chr1 72429988\n", "4 chr1 4866 chr1 5022\n", "... ... ... ... ...\n", "4587285 chrZ 82307023 chrZ 82307302\n", "4587297 chrZ 82308869 chrZ 82309117\n", "4587298 chrZ 82309036 chrZ 82309308\n", "4587310 chrZ 82309416 chrZ 82309572\n", "4587311 chrZ 82309516 chrZ 82309709\n", "\n", "[4356414 rows x 4 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.query('c1==c2 & p1\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c1p1c2p2
11chr120091chr19288
12chr128807chr117707
13chr128819chr117703
14chr128842chr117796
15chr128851chr117818
...............
4587306chrZ82309120chrZ82292613
4587307chrZ82309669chrZ82020370
4587308chrZ82309380chrZ82036721
4587309chrZ82309559chrZ82293151
4587312chrZ82309887chrZ82090383
\n", "

168638 rows × 4 columns

\n", "" ], "text/plain": [ " c1 p1 c2 p2\n", "11 chr1 20091 chr1 9288\n", "12 chr1 28807 chr1 17707\n", "13 chr1 28819 chr1 17703\n", "14 chr1 28842 chr1 17796\n", "15 chr1 28851 chr1 17818\n", "... ... ... ... ...\n", "4587306 chrZ 82309120 chrZ 82292613\n", "4587307 chrZ 82309669 chrZ 82020370\n", "4587308 chrZ 82309380 chrZ 82036721\n", "4587309 chrZ 82309559 chrZ 82293151\n", "4587312 chrZ 82309887 chrZ 82090383\n", "\n", "[168638 rows x 4 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.query('c1==c2 & p1>p2')" ] }, { "cell_type": "code", "execution_count": null, "id": "a5194909", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }