From cecbe61f44b82df3b86aedaa6a82e5f1c9aed325 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 24 May 2026 19:47:44 +0000 Subject: [PATCH 1/9] Refactor DataNamespace and add custom dataset support Agent-Logs-Url: https://github.com/FunGI-cap/CanDI/sessions/1468b644-509d-41a1-8355-ca1f3b3216e2 Co-authored-by: abearab <53412130+abearab@users.noreply.github.com> --- candi/data/depmap.py | 58 ++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/candi/data/depmap.py b/candi/data/depmap.py index 437dc40..7243ab3 100644 --- a/candi/data/depmap.py +++ b/candi/data/depmap.py @@ -1,7 +1,9 @@ import os import subprocess -import pandas as pd +from typing import Any + import anndata as ad +import pandas as pd from tqdm import tqdm LATEST_VERSION = "26Q1" @@ -83,30 +85,25 @@ class DepMapData: """ class DataNamespace: - """Namespace object for accessing loaded datasets under .data with type hints for autocomplete.""" - def __init__(self, parent): - self._parent = parent + """Namespace object for dataset access under `.data`.""" - ## DepMap main datasets ## - - # meta data + __slots__ = ("_parent",) + + def __init__(self, parent: "DepMapData") -> None: + object.__setattr__(self, "_parent", parent) + + # DepMap main datasets Model: pd.DataFrame - # omics data OmicsExpression: pd.DataFrame OmicsSomaticMutations: pd.DataFrame OmicsSomaticMutationsMatrixDamaging: pd.DataFrame OmicsCNGeneWGS: pd.DataFrame OmicsProteinAbundance: pd.DataFrame - # crispr data CRISPRGeneDependency: pd.DataFrame CRISPRGeneEffect: pd.DataFrame - # PRISM drug sensitivity data CRISPRScreenMap: ad.AnnData - # Proteomic data - - - def __getattr__(self, name): + def __getattr__(self, name: str) -> Any: if name in self._parent._datasets: return self._parent._datasets[name] if name in self._parent._paths: @@ -115,6 +112,33 @@ def __getattr__(self, name): ) raise AttributeError(f"No dataset named '{name}' defined.") + def __setattr__(self, name: str, value: Any) -> None: + if name == "_parent": + object.__setattr__(self, name, value) + return + self.add(name=name, dataset=value, overwrite=True) + + def __dir__(self): + return sorted( + set(super().__dir__()) + | set(self._parent._paths) + | set(self._parent._datasets) + ) + + def add(self, name: str, dataset: Any, overwrite: bool = False) -> None: + """Add a dataset to this namespace.""" + if not name or not isinstance(name, str): + raise ValueError("Dataset name must be a non-empty string.") + if not name.isidentifier(): + raise ValueError( + f"Dataset name '{name}' is not a valid Python identifier for attribute access." + ) + if name in self._parent._datasets and not overwrite: + raise ValueError( + f"Dataset '{name}' is already loaded. Pass overwrite=True to replace it." + ) + self._parent._datasets[name] = dataset + def __init__(self, data_dir, version=LATEST_VERSION): self.data_dir = data_dir self.version = version @@ -266,7 +290,11 @@ def load_all(self): def list_available(self): """List all available datasets for this version.""" - return list(self._paths.keys()) + return list(dict.fromkeys([*self._paths.keys(), *self._datasets.keys()])) + + def add_dataset(self, name, dataset, overwrite=False): + """Add a user-provided dataset to the in-memory namespace.""" + self.data.add(name=name, dataset=dataset, overwrite=overwrite) def get(self, name): """Retrieve dataset if already loaded, otherwise prompt to load it.""" From b4e176892aa312fa92ac9e8b3330354e9f6f20d1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 24 May 2026 19:48:00 +0000 Subject: [PATCH 2/9] Run post-change validation checks Agent-Logs-Url: https://github.com/FunGI-cap/CanDI/sessions/1468b644-509d-41a1-8355-ca1f3b3216e2 Co-authored-by: abearab <53412130+abearab@users.noreply.github.com> --- dist/PyCanDI-0.3.0-py2.py3-none-any.whl | Bin 0 -> 8797 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 dist/PyCanDI-0.3.0-py2.py3-none-any.whl diff --git a/dist/PyCanDI-0.3.0-py2.py3-none-any.whl b/dist/PyCanDI-0.3.0-py2.py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..a215452b3a18f27c34c4860824d7f84cc5f37b7e GIT binary patch literal 8797 zcmaKyWmFtpwuT#bmj*(x;BLVsSegbJcXxLS9vp(ZOK=IU!7aE34Z(v4cZW-6?wx#- zJKt2Ty=v8u^VD9oPQClQXP3M*3@k1H06+xHW`5R2Mkv1fb%*^NSkGZ(U~6o_qNitJ zYvHV?$87H|%`h~sGNCju!Zg4_uOJC#1j7R4{|zzzASOihJe2M^@c)G{HgGoh{~T#B z(*T4~{(ouqV5VaTqdn^Jv>&?;wlp@>Z}9nZZYF7N!t+;+l zR<4M(C4GTl@KyPY%!^d$G2YH5Z_+pI%%_TR+Qjw`5alb0RF)J>1aNBQheccgE$p*B zhX&rNR~AXV%8H_JIo00k@@}-p8dxu&NPpC4My+6=Ge_BtRoHEHWVE#iz4rv0QeGdD z3VXjGZWZd_X~}xJKZGWUR8_<`^l-+lHH3uIvLu6O{R=^Lt00HRgaY2kXzVO@Ha;F6 z^?YSS$DuIpI-YjEY!b|CSdov9ZBo1b<)@scRA9&J$~cul#0dV@BL7}-rhMa?js4g{ zNMZ~-yH zq?nrwGwreM9DNIRa(|&3_mN>FcjJOpcdoN5v$AA=(hSU0g(}ci`co|)LdTq52Q}FK z+&Z`e79o<34(M3X=%$-kQpKfm4I1n0P#829&>Is$C|~R$^`nk(l(VIDz%f1AELLy zt}En4)AU)M%Oy$s_i;C@Biw5xrq=A^KLP2Cyn;0Qt}C(z6xkU2=D{ywog%O>4P3E7 zkbzSi7X4Rkqfq0Fl>0F?u>M=FeB`gLjhuml+7AR#>4fx_qlI)OfoAlX-I}UjsI(8_??_)2r#LFjL&)KV+RToja~U4!ds9u%Dx#&lJNk56HC#$HayA} z(wE97VXe*Xp11gEkh16Z%S@O4`f)R*B*@%u8APG~PHywr-+81L%ph--R(B_8u{Z{ByN^sF zdN{J4ymxEaLUFjPrWJuT?`ZOHie(A6dtWxo*+TC$=;j({RC&!k45+6?ia{(0yC-NF zeYf&-e${b-*f!SH=yg&UM~~9`@$#~h8~1p&U#YoJdV-(lU;`U+d$BU-{YrK@Q%|q0 z2g(WdRDP>hm-%C_=LQXNj53Q!gkLb0#y$&8?J=D;o+zUhU!1NJ$0w=)WUh85p zzRJuS7*TIXmTJtB2D(x$mH9A1xJDV?n_@x7P8!&_hK;L4;}B6>OkS2M?&2-8`anj_ zw>WVtPb6!#lD(Iop{9R^YrMYqLGq(V2HfiU&&nyFyQ!2f6<0}pLjBO?S+MD?9NstX z#XPAUc0(*c055M9gA9vztRcwIWn4k1@hy4d}VrALc ztgpu2L+N|fJ5CATs5HIgDwqqqa`q}4WquuDv$E{(3#PO4Z8kA<^8Ht_=SU_BJ62P~F zKfVgz;}`!EcP;p_PH}*Cr})@GuTleSuc3yVtEFs%d>vpWV^EzG{T-}&hd96uVYo*3 zAsSF_Nw3w2PhS!w9x;O~CL}RC&SqT<5_q6~v<<~E^JvgGk^%d5BTD{2I%eX~Z>m>c z1-!lcoZN|Ae8PChr*2f>pSZ?Tqu8gD{v=Hlx(he!BWAOBo2M1tdGSub#FSo-H=UL0 z$~{Xo)dvDKfx&}idFK#;I1`M1{-UWH2WEj}*?k@m5SB%^qaE2`;@ zywHRH*rbS-StNEtMWAvKh>U~Zcn>-h8BvrL*SWa*WgRzap#`wnPnn#@e95hX9*?q8JSW~L38lS^m9Qt z2og1%gN4(~#4CP}w6Gz4;`Fef9lLpn=n`kS;l2t1GQL~yWDpEOZ2|&UD;?O*FW}1Rg}Dx~yXi!v3yIZl+u~(QsCK$7xR=?RQUNpI+S&6T z>Q;Ob$6EM1@ozGY_=;X23ECT_Czb>dd0^EpzBzk!|Xu%-Jbdsog|$9WA$hQ0F) zcj!zQWC1u^;rAQoSw*a`HN7#W%?Gq>9vh^;4i5Itw@$=wk_-KlHt) zQsi~KctR2OYq&Y_C+aI0rXfL7HrNbpa4U(gE4!47Pb;0iO+pDIz;e4;#S~dQZfHZN zsz5uJl!HDo`5!Nk)&+eHjHpW=IX($rQO*edNRTD%=2l3S=&z2`&0dUU%{AHhh3xuM zB}tZf6K;F@&m-V1Pt&gJMYYm2W)MSI!b?qWCum3Y_dh{H{9z()4SMwI*~PCkLvV^~ zy^1=lw2SHBmn*62q^PrvRfXavUZSD1jU2h@0>>$ zI9Q{&i36>P=)HNM{qVNUr`A_5mWl=LR&>7W9L;NPX9*iyY_}1%wEt41<091%pX6po zSpNiFP1B9UL_2)$0cC|(O zMtGFoF>22$#dt7}h0cB+JdJcFgO}jbeVv2~T19lcc`We76}*4L9%R;9oV%`(2WtR3k&^zsYI zM{R1e6@$(6t`HLYaK|rRu|>6Z#H1G^@Ot?(d^odgKQS(rVK8Z#dwgMy?$0jMTLKQx z8$bsbp6(4D4|0#EdF!BJl3wfcbmAuF2Uk&5)`-MfyFLZ&EeZ2`u9lY%84j?Z?V`NF z9maU7J6OG=zORt%qYG(fz5{S(y;km*sNShs!ES>mR{PJ>`4Gf*-@JP~@s{weL;v!3 zB1^i$tg4t=^q8+PX;Jf60LLBfW+f^LX=;rhix=yy-o9FY(u%*t=tj?btPPG`?iR+E z^tAD0nObb#>*KPt7PBnDQqnuJ=x9Boan@+Ox*}VfEioh=n+X~AuKMOFbS-79trt!K zoMwUJv7VrDKl1GtXwOdB?0B`yULaEz z-6PCG=onT7-)8UV&a2+ev2D#R89g0dt~;6@!K5Bv^5Khg5N3)u3o&7Ij=y}HK-gWS zb;m<#3o#~;H6&FM{M(nY->5bQ?A%?=mw9(P z%0J1)1|LE^8B02NQ8YYI%t7T8`{*TdnvK$jU07wtI@1WqKXomD(jK%Q&ziZ>4;C(n^f_{SeZH0c>G;JNUSsPdY-~b5#z1R;{zU#sWIXf3D5UYFAL!aapHB_p*dfimp!7X%ymuTvq|CGrn{7}>(gBT zS?X}0SsPt`gM6>s|MUZV5_stO(lMfTOENMbxxo(?j%zU=Fh^C@6@(aKfH_4=lamYl z3|8PKgkjBF!%B(2n4+-xy+SJrQ%I2 z1LZp!?4w-WkLEyha`5B@d*lMTCzyw)3rZ?Bwm&fDk$2uc-~H%Nkd_-@A~D2aAw9%5 zy;Cwn0&#r&44-AGRU&XaU&f)z6Kt6yzMitRnD^h)=zUY8hx0mC-{UeQ9 zza>JYL`O0Okzg98CJZ*7LuunRHI($uQr8wgo0VY17$`9N5oOBuykhOR%MrDgZVs7I zpEOx+?y1#5Lv^O1bZDlOcB_3lz>4H&1olpnkA{HSWy@T6W63C&xCts1j*a}cE}|k$ zOBLl5szU)Y#hXFq;kWaucFs`@bF_hxO>I{=m9hfIhiaS{d4QRa#axz+E*`NZ#Z-32 z@V-b;<&}4h_t|-RCHM6i)5Ar7_PJvE%VyDcmW_pG?cmACJc^2|A`-5h}ku zLF&8Og_cXfF!u?>dETj#EBI9DR2PWL998o3>lRnzsM&)MRW$vA zDnkgpe5me%AlFU1*?xz=5aJy(J`4j!bSnbKj^Z0l`ZFRm`-*mYwZNKa+tX2VrGxr0dly2%jl438rU6y{tfoskus&#v(?%u?PVEMaHPu6?+U6?t13ahot zJV(Z*-K#B^QEE!pLP8LL=F)~4!@2!f&kHQ`o+@fGAZoxHY)#T>YeaYS-kFaRvC4-` z)`Wj3vMA!VZ%<1=ic+3~LhN1JED4abkB{#coDaQmPqgkn>XTubE7~vk(wI#!^XcyB zXaM4n@nf6iokG4R+O)VSr&;|>kC63|e%ad=J!8^VQkc0joCm)UY|UW?48;#gW8zjW zowQLG{FgO(JYIez$0JOziQC)Sp>-4wXJMnEmj_>Ea=EiT9;hUj`_G031w?7s(_R@DeqopS2vYoY z;eV^s?Xx=7HYnP=LjwTp&k9BQtWNUoVg|P2U?x^(4rW$nV+$u|CJS3rI~EzRn1rm7 zgpQ(?T{uE{c>JPB*qR zL7Xuyvr%2_Nw-sb8|dDFcb8CE*;(0S2Q)zaJ&g9d^}gd_Hr4sv9Z`E9SnCVdnie%4 znNb3pGx7u?M!0JOS+K6~~{M4Wpe-L%0U_6W~K z&;{*>$o9h7*Qn|bUmD*9pxKjpqP)AXSwA$O-nHgn<%Ygk*0gu1zS>5NM{{swGN4@T zbLDxLzg8~zIj@Cn;+9xMAJGGS$Jd$K{ycMGYHqASc6(`6H2cjqqQ|=^byVeyTA6`< zps+|)PbX$q&f)xCNV9uzem3odTR5%!=H%VF84YvQgF8{|EzRunC(ni&tbOYbaHn`wc0=u*5%nv(a zUwT?hedDuD+n;QND4wL=lqZ??)B`lc3TED=en35Ri#%$9<(e#;{tl2e*G*$8raE>+ zSuswfBJ)w!oPJF^YOJuSs1Hf9Xdg4afks(mz%1DxgDhD&e#*{>;U$v#l*wKbu$zH))2bHRMLP5LSAnhl z?M2$TNNH(#5(@(iL!udUIEvrUH3d+Oo-=Vv8z0HIb(O_%Sur%1Fj8lx%ap2E+>7{q zNo*T;4#dO6A7P>jJw@TGh;4KAZU})g z$Hdi^n$+QTJ<ZA-~j+v3;=-azsy>QgtCaZh_XngnvLCafqksda}ev` z%%YkHe+Aktt@2v@WQGW|FtOrFh=R>h!zoKCP1GcWQ18$lKI_ft)x4rxa7^di^#>>n zv}{Mc!yt`gQK${H*y<1|`AiF6MS9+*l(Y_@z6iw&F|y;BK|%M=;{J4+G3Wsh3)R5N zgM|UKPNYZ`4q{8;>($`4eZYNhQ1&o46%Qo<&bdek8ufDOyHcpXn z8z@l;sx`|&5g1BQ^cqt@*Oxh#@JB-Z6!R1e`&?_XD}&@^o?!DuNQmsnU(QzYpu@RJK39bnkeTo~UUJ~m4W z$idXFrclwZQZVktg$!4b_b<%7w3Eh@^d*Q@om|7vZV{??X)t3<<9Fsg} zZckpzD0|=O^BX6)wG%#I-b0IRUcP<|D!N*QG&0rUJ)5n>R5k7w`Rc3g_h^Bwj+)nK z5xcwcYj;k0R@~~4{@TIm-dw!YyvEQyEr^3q(P*>LWeJ-SBMXPXcCR+)*7!k(uZhHgA|S6{x5(0UAp|VvKq<*un8{W1XdALowx575a>d zap8|G{je=CTewb<{lKtJkkt=O&?(+;(enIKnrF9SMc8X$UnS-+L9l7iq~be$o}v-z zL^zD86&XI^6?ub^8zJF9d@J4Eux={@1FnT(nWi%n9HAX`JfHHBm*MS_B&Uj!OT?$n zX(+S@2VYK_ZyLUTP2uru3DE|lg{AX{??dgn3u&i>0$Bl0$p)=t-_sLcm{eCRMCeNz zF><-o{HS79{;8S=R8-#WUg8Qw!z8#jqKNFu{+5>KuZ}EgzGy*(W2{)q2X zKo8s=k;J-L?R}}pU9b|&USO}Fiju=<_d1T``$K|;JI+Fi;Y;%(!@M%ZTidl(E?5VI_B;1nFVXmqiBqXlJJ2ZO z*!cB$7G5T-6!mEIXDMdpt(#4u3!025NZcmG8ZY^K(53SAJ2Q_bo>_{uK&)dhqD`-T zp14>okuzbZBT5r0Sh}!Ni@9wzV`TG+^a z*O&&w^#d!jdO4c{ZxEFGlPW|37#t_Tx{nR|90io2J~sRwWJHclE$rwfyK?A)NYevo z)zW26n8w~E0^j)o*^0g-7$lUU423Vhh#@~1im>8i#P#`jS3plFQDHOcmZhfrf{%cp zjkog^(Nry-E;f>Ww(YX;1;*jk75qa;G;~Tzp7__=&aqC7vYaO#vdyi4^vZozo4b*a=lW&)954S%9aEE(kdQ$cmxPRdMU!Tt9~&93Rb-v!Sap;em1dG+9Av3c zl$07|Wd6W{P@*uwI>Es<$uY5kJT%Vo^@4F0jh0DjbWo;7k(Q1gvWX0qX;h>w=A0ZI z9+#ehjC>0GK=4ZZ^ItlHzcSO4JPQ_i&dB^ZX#Y!QCbrIw?t1oi7Pih#%+7Ak;j&}n z3}fTqNETVqCUyaJYTb?@G+3Q)GxSCl1%0c?q6E~y=<(RjIhFQRn zq49C%E$BbUqo_`f`%ur_Qy2h%Xh35D2prR;Nu@b+A0m;Q8ZrVWLmQ@O6=-QunuevQIXA{0{Q7F#Egk_yHJ$tfeRE;xfK*fa_ z19(@FlKc81mTY16#-E3bHa?p9t=8y7!*>*Xe?0UI@?iBtKDn@)$*zvMAtC*Q^*0li zT}$KxGQ}PloIyW9Zf4}h9m+l=>^WIik|(HLJ6w_P$(VRF)US$y$>EdHTIofUYzBj1 z-du;82D*rSn}#!R{&KjVy|N;vWk#YtxsUrU3ZiV5W7MNt_&K~P4`?B1Kfcw~q zJDMuDL8?ye>C0%mk-#&LNlYX?LY2G{T1+^jN)&A#PbgbfA?MfuWkGj_UHQjH|*CR4Sxsw zzt#Mov_Dn!H?8Nz-_!n~sehvWl)c|59pb-7{V9TfQvX!3-_(u&Lj5;Q`xE)6eEmiW gJr|O{dxQTJGkIyaXYT|6AU~gL&)(nkm+=Su55A3)$^ZZW literal 0 HcmV?d00001 From e56d1907972dc2a66ef7fd569af53812ce263d7c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 24 May 2026 19:48:11 +0000 Subject: [PATCH 3/9] Remove generated wheel artifact Agent-Logs-Url: https://github.com/FunGI-cap/CanDI/sessions/1468b644-509d-41a1-8355-ca1f3b3216e2 Co-authored-by: abearab <53412130+abearab@users.noreply.github.com> --- dist/PyCanDI-0.3.0-py2.py3-none-any.whl | Bin 8797 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 dist/PyCanDI-0.3.0-py2.py3-none-any.whl diff --git a/dist/PyCanDI-0.3.0-py2.py3-none-any.whl b/dist/PyCanDI-0.3.0-py2.py3-none-any.whl deleted file mode 100644 index a215452b3a18f27c34c4860824d7f84cc5f37b7e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8797 zcmaKyWmFtpwuT#bmj*(x;BLVsSegbJcXxLS9vp(ZOK=IU!7aE34Z(v4cZW-6?wx#- zJKt2Ty=v8u^VD9oPQClQXP3M*3@k1H06+xHW`5R2Mkv1fb%*^NSkGZ(U~6o_qNitJ zYvHV?$87H|%`h~sGNCju!Zg4_uOJC#1j7R4{|zzzASOihJe2M^@c)G{HgGoh{~T#B z(*T4~{(ouqV5VaTqdn^Jv>&?;wlp@>Z}9nZZYF7N!t+;+l zR<4M(C4GTl@KyPY%!^d$G2YH5Z_+pI%%_TR+Qjw`5alb0RF)J>1aNBQheccgE$p*B zhX&rNR~AXV%8H_JIo00k@@}-p8dxu&NPpC4My+6=Ge_BtRoHEHWVE#iz4rv0QeGdD z3VXjGZWZd_X~}xJKZGWUR8_<`^l-+lHH3uIvLu6O{R=^Lt00HRgaY2kXzVO@Ha;F6 z^?YSS$DuIpI-YjEY!b|CSdov9ZBo1b<)@scRA9&J$~cul#0dV@BL7}-rhMa?js4g{ zNMZ~-yH zq?nrwGwreM9DNIRa(|&3_mN>FcjJOpcdoN5v$AA=(hSU0g(}ci`co|)LdTq52Q}FK z+&Z`e79o<34(M3X=%$-kQpKfm4I1n0P#829&>Is$C|~R$^`nk(l(VIDz%f1AELLy zt}En4)AU)M%Oy$s_i;C@Biw5xrq=A^KLP2Cyn;0Qt}C(z6xkU2=D{ywog%O>4P3E7 zkbzSi7X4Rkqfq0Fl>0F?u>M=FeB`gLjhuml+7AR#>4fx_qlI)OfoAlX-I}UjsI(8_??_)2r#LFjL&)KV+RToja~U4!ds9u%Dx#&lJNk56HC#$HayA} z(wE97VXe*Xp11gEkh16Z%S@O4`f)R*B*@%u8APG~PHywr-+81L%ph--R(B_8u{Z{ByN^sF zdN{J4ymxEaLUFjPrWJuT?`ZOHie(A6dtWxo*+TC$=;j({RC&!k45+6?ia{(0yC-NF zeYf&-e${b-*f!SH=yg&UM~~9`@$#~h8~1p&U#YoJdV-(lU;`U+d$BU-{YrK@Q%|q0 z2g(WdRDP>hm-%C_=LQXNj53Q!gkLb0#y$&8?J=D;o+zUhU!1NJ$0w=)WUh85p zzRJuS7*TIXmTJtB2D(x$mH9A1xJDV?n_@x7P8!&_hK;L4;}B6>OkS2M?&2-8`anj_ zw>WVtPb6!#lD(Iop{9R^YrMYqLGq(V2HfiU&&nyFyQ!2f6<0}pLjBO?S+MD?9NstX z#XPAUc0(*c055M9gA9vztRcwIWn4k1@hy4d}VrALc ztgpu2L+N|fJ5CATs5HIgDwqqqa`q}4WquuDv$E{(3#PO4Z8kA<^8Ht_=SU_BJ62P~F zKfVgz;}`!EcP;p_PH}*Cr})@GuTleSuc3yVtEFs%d>vpWV^EzG{T-}&hd96uVYo*3 zAsSF_Nw3w2PhS!w9x;O~CL}RC&SqT<5_q6~v<<~E^JvgGk^%d5BTD{2I%eX~Z>m>c z1-!lcoZN|Ae8PChr*2f>pSZ?Tqu8gD{v=Hlx(he!BWAOBo2M1tdGSub#FSo-H=UL0 z$~{Xo)dvDKfx&}idFK#;I1`M1{-UWH2WEj}*?k@m5SB%^qaE2`;@ zywHRH*rbS-StNEtMWAvKh>U~Zcn>-h8BvrL*SWa*WgRzap#`wnPnn#@e95hX9*?q8JSW~L38lS^m9Qt z2og1%gN4(~#4CP}w6Gz4;`Fef9lLpn=n`kS;l2t1GQL~yWDpEOZ2|&UD;?O*FW}1Rg}Dx~yXi!v3yIZl+u~(QsCK$7xR=?RQUNpI+S&6T z>Q;Ob$6EM1@ozGY_=;X23ECT_Czb>dd0^EpzBzk!|Xu%-Jbdsog|$9WA$hQ0F) zcj!zQWC1u^;rAQoSw*a`HN7#W%?Gq>9vh^;4i5Itw@$=wk_-KlHt) zQsi~KctR2OYq&Y_C+aI0rXfL7HrNbpa4U(gE4!47Pb;0iO+pDIz;e4;#S~dQZfHZN zsz5uJl!HDo`5!Nk)&+eHjHpW=IX($rQO*edNRTD%=2l3S=&z2`&0dUU%{AHhh3xuM zB}tZf6K;F@&m-V1Pt&gJMYYm2W)MSI!b?qWCum3Y_dh{H{9z()4SMwI*~PCkLvV^~ zy^1=lw2SHBmn*62q^PrvRfXavUZSD1jU2h@0>>$ zI9Q{&i36>P=)HNM{qVNUr`A_5mWl=LR&>7W9L;NPX9*iyY_}1%wEt41<091%pX6po zSpNiFP1B9UL_2)$0cC|(O zMtGFoF>22$#dt7}h0cB+JdJcFgO}jbeVv2~T19lcc`We76}*4L9%R;9oV%`(2WtR3k&^zsYI zM{R1e6@$(6t`HLYaK|rRu|>6Z#H1G^@Ot?(d^odgKQS(rVK8Z#dwgMy?$0jMTLKQx z8$bsbp6(4D4|0#EdF!BJl3wfcbmAuF2Uk&5)`-MfyFLZ&EeZ2`u9lY%84j?Z?V`NF z9maU7J6OG=zORt%qYG(fz5{S(y;km*sNShs!ES>mR{PJ>`4Gf*-@JP~@s{weL;v!3 zB1^i$tg4t=^q8+PX;Jf60LLBfW+f^LX=;rhix=yy-o9FY(u%*t=tj?btPPG`?iR+E z^tAD0nObb#>*KPt7PBnDQqnuJ=x9Boan@+Ox*}VfEioh=n+X~AuKMOFbS-79trt!K zoMwUJv7VrDKl1GtXwOdB?0B`yULaEz z-6PCG=onT7-)8UV&a2+ev2D#R89g0dt~;6@!K5Bv^5Khg5N3)u3o&7Ij=y}HK-gWS zb;m<#3o#~;H6&FM{M(nY->5bQ?A%?=mw9(P z%0J1)1|LE^8B02NQ8YYI%t7T8`{*TdnvK$jU07wtI@1WqKXomD(jK%Q&ziZ>4;C(n^f_{SeZH0c>G;JNUSsPdY-~b5#z1R;{zU#sWIXf3D5UYFAL!aapHB_p*dfimp!7X%ymuTvq|CGrn{7}>(gBT zS?X}0SsPt`gM6>s|MUZV5_stO(lMfTOENMbxxo(?j%zU=Fh^C@6@(aKfH_4=lamYl z3|8PKgkjBF!%B(2n4+-xy+SJrQ%I2 z1LZp!?4w-WkLEyha`5B@d*lMTCzyw)3rZ?Bwm&fDk$2uc-~H%Nkd_-@A~D2aAw9%5 zy;Cwn0&#r&44-AGRU&XaU&f)z6Kt6yzMitRnD^h)=zUY8hx0mC-{UeQ9 zza>JYL`O0Okzg98CJZ*7LuunRHI($uQr8wgo0VY17$`9N5oOBuykhOR%MrDgZVs7I zpEOx+?y1#5Lv^O1bZDlOcB_3lz>4H&1olpnkA{HSWy@T6W63C&xCts1j*a}cE}|k$ zOBLl5szU)Y#hXFq;kWaucFs`@bF_hxO>I{=m9hfIhiaS{d4QRa#axz+E*`NZ#Z-32 z@V-b;<&}4h_t|-RCHM6i)5Ar7_PJvE%VyDcmW_pG?cmACJc^2|A`-5h}ku zLF&8Og_cXfF!u?>dETj#EBI9DR2PWL998o3>lRnzsM&)MRW$vA zDnkgpe5me%AlFU1*?xz=5aJy(J`4j!bSnbKj^Z0l`ZFRm`-*mYwZNKa+tX2VrGxr0dly2%jl438rU6y{tfoskus&#v(?%u?PVEMaHPu6?+U6?t13ahot zJV(Z*-K#B^QEE!pLP8LL=F)~4!@2!f&kHQ`o+@fGAZoxHY)#T>YeaYS-kFaRvC4-` z)`Wj3vMA!VZ%<1=ic+3~LhN1JED4abkB{#coDaQmPqgkn>XTubE7~vk(wI#!^XcyB zXaM4n@nf6iokG4R+O)VSr&;|>kC63|e%ad=J!8^VQkc0joCm)UY|UW?48;#gW8zjW zowQLG{FgO(JYIez$0JOziQC)Sp>-4wXJMnEmj_>Ea=EiT9;hUj`_G031w?7s(_R@DeqopS2vYoY z;eV^s?Xx=7HYnP=LjwTp&k9BQtWNUoVg|P2U?x^(4rW$nV+$u|CJS3rI~EzRn1rm7 zgpQ(?T{uE{c>JPB*qR zL7Xuyvr%2_Nw-sb8|dDFcb8CE*;(0S2Q)zaJ&g9d^}gd_Hr4sv9Z`E9SnCVdnie%4 znNb3pGx7u?M!0JOS+K6~~{M4Wpe-L%0U_6W~K z&;{*>$o9h7*Qn|bUmD*9pxKjpqP)AXSwA$O-nHgn<%Ygk*0gu1zS>5NM{{swGN4@T zbLDxLzg8~zIj@Cn;+9xMAJGGS$Jd$K{ycMGYHqASc6(`6H2cjqqQ|=^byVeyTA6`< zps+|)PbX$q&f)xCNV9uzem3odTR5%!=H%VF84YvQgF8{|EzRunC(ni&tbOYbaHn`wc0=u*5%nv(a zUwT?hedDuD+n;QND4wL=lqZ??)B`lc3TED=en35Ri#%$9<(e#;{tl2e*G*$8raE>+ zSuswfBJ)w!oPJF^YOJuSs1Hf9Xdg4afks(mz%1DxgDhD&e#*{>;U$v#l*wKbu$zH))2bHRMLP5LSAnhl z?M2$TNNH(#5(@(iL!udUIEvrUH3d+Oo-=Vv8z0HIb(O_%Sur%1Fj8lx%ap2E+>7{q zNo*T;4#dO6A7P>jJw@TGh;4KAZU})g z$Hdi^n$+QTJ<ZA-~j+v3;=-azsy>QgtCaZh_XngnvLCafqksda}ev` z%%YkHe+Aktt@2v@WQGW|FtOrFh=R>h!zoKCP1GcWQ18$lKI_ft)x4rxa7^di^#>>n zv}{Mc!yt`gQK${H*y<1|`AiF6MS9+*l(Y_@z6iw&F|y;BK|%M=;{J4+G3Wsh3)R5N zgM|UKPNYZ`4q{8;>($`4eZYNhQ1&o46%Qo<&bdek8ufDOyHcpXn z8z@l;sx`|&5g1BQ^cqt@*Oxh#@JB-Z6!R1e`&?_XD}&@^o?!DuNQmsnU(QzYpu@RJK39bnkeTo~UUJ~m4W z$idXFrclwZQZVktg$!4b_b<%7w3Eh@^d*Q@om|7vZV{??X)t3<<9Fsg} zZckpzD0|=O^BX6)wG%#I-b0IRUcP<|D!N*QG&0rUJ)5n>R5k7w`Rc3g_h^Bwj+)nK z5xcwcYj;k0R@~~4{@TIm-dw!YyvEQyEr^3q(P*>LWeJ-SBMXPXcCR+)*7!k(uZhHgA|S6{x5(0UAp|VvKq<*un8{W1XdALowx575a>d zap8|G{je=CTewb<{lKtJkkt=O&?(+;(enIKnrF9SMc8X$UnS-+L9l7iq~be$o}v-z zL^zD86&XI^6?ub^8zJF9d@J4Eux={@1FnT(nWi%n9HAX`JfHHBm*MS_B&Uj!OT?$n zX(+S@2VYK_ZyLUTP2uru3DE|lg{AX{??dgn3u&i>0$Bl0$p)=t-_sLcm{eCRMCeNz zF><-o{HS79{;8S=R8-#WUg8Qw!z8#jqKNFu{+5>KuZ}EgzGy*(W2{)q2X zKo8s=k;J-L?R}}pU9b|&USO}Fiju=<_d1T``$K|;JI+Fi;Y;%(!@M%ZTidl(E?5VI_B;1nFVXmqiBqXlJJ2ZO z*!cB$7G5T-6!mEIXDMdpt(#4u3!025NZcmG8ZY^K(53SAJ2Q_bo>_{uK&)dhqD`-T zp14>okuzbZBT5r0Sh}!Ni@9wzV`TG+^a z*O&&w^#d!jdO4c{ZxEFGlPW|37#t_Tx{nR|90io2J~sRwWJHclE$rwfyK?A)NYevo z)zW26n8w~E0^j)o*^0g-7$lUU423Vhh#@~1im>8i#P#`jS3plFQDHOcmZhfrf{%cp zjkog^(Nry-E;f>Ww(YX;1;*jk75qa;G;~Tzp7__=&aqC7vYaO#vdyi4^vZozo4b*a=lW&)954S%9aEE(kdQ$cmxPRdMU!Tt9~&93Rb-v!Sap;em1dG+9Av3c zl$07|Wd6W{P@*uwI>Es<$uY5kJT%Vo^@4F0jh0DjbWo;7k(Q1gvWX0qX;h>w=A0ZI z9+#ehjC>0GK=4ZZ^ItlHzcSO4JPQ_i&dB^ZX#Y!QCbrIw?t1oi7Pih#%+7Ak;j&}n z3}fTqNETVqCUyaJYTb?@G+3Q)GxSCl1%0c?q6E~y=<(RjIhFQRn zq49C%E$BbUqo_`f`%ur_Qy2h%Xh35D2prR;Nu@b+A0m;Q8ZrVWLmQ@O6=-QunuevQIXA{0{Q7F#Egk_yHJ$tfeRE;xfK*fa_ z19(@FlKc81mTY16#-E3bHa?p9t=8y7!*>*Xe?0UI@?iBtKDn@)$*zvMAtC*Q^*0li zT}$KxGQ}PloIyW9Zf4}h9m+l=>^WIik|(HLJ6w_P$(VRF)US$y$>EdHTIofUYzBj1 z-du;82D*rSn}#!R{&KjVy|N;vWk#YtxsUrU3ZiV5W7MNt_&K~P4`?B1Kfcw~q zJDMuDL8?ye>C0%mk-#&LNlYX?LY2G{T1+^jN)&A#PbgbfA?MfuWkGj_UHQjH|*CR4Sxsw zzt#Mov_Dn!H?8Nz-_!n~sehvWl)c|59pb-7{V9TfQvX!3-_(u&Lj5;Q`xE)6eEmiW gJr|O{dxQTJGkIyaXYT|6AU~gL&)(nkm+=Su55A3)$^ZZW From 3edc028f42fd38395b193899bf05b1a159046d42 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 24 May 2026 19:57:51 +0000 Subject: [PATCH 4/9] Exclude OmicsProteinAbundance and CRISPRScreenMap from defaults Agent-Logs-Url: https://github.com/FunGI-cap/CanDI/sessions/f4ed5b47-7f64-4db2-8600-a06b1359e493 Co-authored-by: abearab <53412130+abearab@users.noreply.github.com> --- candi/data/depmap.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/candi/data/depmap.py b/candi/data/depmap.py index 7243ab3..718cf44 100644 --- a/candi/data/depmap.py +++ b/candi/data/depmap.py @@ -67,7 +67,6 @@ def download_essential(self, gzip=True): "OmicsSomaticMutationsMatrixDamaging.csv", "OmicsCNGeneWGS.csv", "CRISPRGeneDependency.csv", - "CRISPRScreenMap.csv", "CRISPRGeneEffect.csv", "OmicsCNSegmentsWGS.csv" ] @@ -98,10 +97,8 @@ def __init__(self, parent: "DepMapData") -> None: OmicsSomaticMutations: pd.DataFrame OmicsSomaticMutationsMatrixDamaging: pd.DataFrame OmicsCNGeneWGS: pd.DataFrame - OmicsProteinAbundance: pd.DataFrame CRISPRGeneDependency: pd.DataFrame CRISPRGeneEffect: pd.DataFrame - CRISPRScreenMap: ad.AnnData def __getattr__(self, name: str) -> Any: if name in self._parent._datasets: @@ -169,9 +166,7 @@ def _get_dataset_paths(self): "OmicsCNSegmentsWGS": os.path.join(base, "OmicsCNSegmentsWGS.csv.gz"), "CRISPRGeneDependency": os.path.join(base, "CRISPRGeneDependency.csv.gz"), "CRISPRGeneEffect": os.path.join(base, "CRISPRGeneEffect.csv.gz"), - "CRISPRScreenMap": os.path.join(base, "CRISPRScreenMap.csv.gz"), "PRISMDrugSensitivity": os.path.join(self.data_dir, "PRISM_fold_change_viability.h5ad.gz"), - "OmicsProteinAbundance": os.path.join(self.data_dir, "CCLE_protein_quantitation.tab") } def _check_paths_exist(self): @@ -223,19 +218,6 @@ def load(self, name, inplace=True, engine='pandas', **kwargs): data = df.copy() - elif name in { - "OmicsProteinAbundance", - }: - if engine == 'polars': - # NotImplementedError - raise NotImplementedError("Polars engine is not yet implemented for loading datasets.") - elif engine == 'pandas': - df = pd.read_csv(path, sep='\t', index_col=1, header=0).drop(columns=['UniprotID','EntrezID']).T - df.index.name = "ModelID" - df.columns.name = None - - data = df.copy() - elif name in { "OmicsExpression","OmicsCNGeneWGS", "OmicsSomaticMutationsMatrixDamaging", From 118157b9a55770c43b72a572f2986b687007e11a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 25 May 2026 01:41:18 +0000 Subject: [PATCH 5/9] Extract reusable CancerDataNamespace base class Agent-Logs-Url: https://github.com/FunGI-cap/CanDI/sessions/efaf926c-0dc8-46b3-9b53-743dfd556ecd Co-authored-by: abearab <53412130+abearab@users.noreply.github.com> --- candi/data/__init__.py | 3 ++- candi/data/cancer_database.py | 46 +++++++++++++++++++++++++++++++++++ candi/data/depmap.py | 46 +++-------------------------------- 3 files changed, 51 insertions(+), 44 deletions(-) create mode 100644 candi/data/cancer_database.py diff --git a/candi/data/__init__.py b/candi/data/__init__.py index d697f2d..69f1c20 100644 --- a/candi/data/__init__.py +++ b/candi/data/__init__.py @@ -1 +1,2 @@ -from .depmap import DepMapAPI, DepMapData \ No newline at end of file +from .depmap import DepMapAPI, DepMapData +from .cancer_database import CancerDataNamespace \ No newline at end of file diff --git a/candi/data/cancer_database.py b/candi/data/cancer_database.py new file mode 100644 index 0000000..776a90e --- /dev/null +++ b/candi/data/cancer_database.py @@ -0,0 +1,46 @@ +from typing import Any + + +class CancerDataNamespace: + """Reusable namespace for attribute-style dataset access under `.data`.""" + + __slots__ = ("_parent",) + + def __init__(self, parent: Any) -> None: + object.__setattr__(self, "_parent", parent) + + def __getattr__(self, name: str) -> Any: + if name in self._parent._datasets: + return self._parent._datasets[name] + if name in self._parent._paths: + raise AttributeError( + f"Dataset '{name}' is available but not loaded. Call `.load('{name}')` first." + ) + raise AttributeError(f"No dataset named '{name}' defined.") + + def __setattr__(self, name: str, value: Any) -> None: + if name == "_parent": + object.__setattr__(self, name, value) + return + self.add(name=name, dataset=value, overwrite=True) + + def __dir__(self): + return sorted( + set(super().__dir__()) + | set(self._parent._paths) + | set(self._parent._datasets) + ) + + def add(self, name: str, dataset: Any, overwrite: bool = False) -> None: + """Add a dataset to this namespace.""" + if not name or not isinstance(name, str): + raise ValueError("Dataset name must be a non-empty string.") + if not name.isidentifier(): + raise ValueError( + f"Dataset name '{name}' is not a valid Python identifier for attribute access." + ) + if name in self._parent._datasets and not overwrite: + raise ValueError( + f"Dataset '{name}' is already loaded. Pass overwrite=True to replace it." + ) + self._parent._datasets[name] = dataset diff --git a/candi/data/depmap.py b/candi/data/depmap.py index 718cf44..6d8ed23 100644 --- a/candi/data/depmap.py +++ b/candi/data/depmap.py @@ -1,11 +1,12 @@ import os import subprocess -from typing import Any import anndata as ad import pandas as pd from tqdm import tqdm +from .cancer_database import CancerDataNamespace + LATEST_VERSION = "26Q1" FILES_URL = 'https://depmap.org/portal/api/download/files' @@ -83,14 +84,9 @@ class DepMapData: Provides attribute-style access to datasets (e.g., obj.data.Model). """ - class DataNamespace: + class DataNamespace(CancerDataNamespace): """Namespace object for dataset access under `.data`.""" - __slots__ = ("_parent",) - - def __init__(self, parent: "DepMapData") -> None: - object.__setattr__(self, "_parent", parent) - # DepMap main datasets Model: pd.DataFrame OmicsExpression: pd.DataFrame @@ -100,42 +96,6 @@ def __init__(self, parent: "DepMapData") -> None: CRISPRGeneDependency: pd.DataFrame CRISPRGeneEffect: pd.DataFrame - def __getattr__(self, name: str) -> Any: - if name in self._parent._datasets: - return self._parent._datasets[name] - if name in self._parent._paths: - raise AttributeError( - f"Dataset '{name}' is available but not loaded. Call `.load('{name}')` first." - ) - raise AttributeError(f"No dataset named '{name}' defined.") - - def __setattr__(self, name: str, value: Any) -> None: - if name == "_parent": - object.__setattr__(self, name, value) - return - self.add(name=name, dataset=value, overwrite=True) - - def __dir__(self): - return sorted( - set(super().__dir__()) - | set(self._parent._paths) - | set(self._parent._datasets) - ) - - def add(self, name: str, dataset: Any, overwrite: bool = False) -> None: - """Add a dataset to this namespace.""" - if not name or not isinstance(name, str): - raise ValueError("Dataset name must be a non-empty string.") - if not name.isidentifier(): - raise ValueError( - f"Dataset name '{name}' is not a valid Python identifier for attribute access." - ) - if name in self._parent._datasets and not overwrite: - raise ValueError( - f"Dataset '{name}' is already loaded. Pass overwrite=True to replace it." - ) - self._parent._datasets[name] = dataset - def __init__(self, data_dir, version=LATEST_VERSION): self.data_dir = data_dir self.version = version From 69d9dbd11d6ad33d8750b67cf7f1938fcb4a13d6 Mon Sep 17 00:00:00 2001 From: Abolfazl Arab Date: Sun, 24 May 2026 19:20:33 -0700 Subject: [PATCH 6/9] Rename cancer_database.py to _database.py --- candi/data/{cancer_database.py => _database.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename candi/data/{cancer_database.py => _database.py} (100%) diff --git a/candi/data/cancer_database.py b/candi/data/_database.py similarity index 100% rename from candi/data/cancer_database.py rename to candi/data/_database.py From d0a83e5cf1f9101f09b9caf2a7555a5ff348822c Mon Sep 17 00:00:00 2001 From: Abolfazl Arab Date: Sun, 24 May 2026 19:20:50 -0700 Subject: [PATCH 7/9] Fix import statement formatting in __init__.py --- candi/data/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/candi/data/__init__.py b/candi/data/__init__.py index 69f1c20..3840985 100644 --- a/candi/data/__init__.py +++ b/candi/data/__init__.py @@ -1,2 +1 @@ from .depmap import DepMapAPI, DepMapData -from .cancer_database import CancerDataNamespace \ No newline at end of file From efbf226069652621bb298d5b82001224c696d4c5 Mon Sep 17 00:00:00 2001 From: Abolfazl Arab Date: Sun, 24 May 2026 19:21:14 -0700 Subject: [PATCH 8/9] Change import path for CancerDataNamespace --- candi/data/depmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/candi/data/depmap.py b/candi/data/depmap.py index 6d8ed23..931a42e 100644 --- a/candi/data/depmap.py +++ b/candi/data/depmap.py @@ -5,7 +5,7 @@ import pandas as pd from tqdm import tqdm -from .cancer_database import CancerDataNamespace +from ._database import CancerDataNamespace LATEST_VERSION = "26Q1" FILES_URL = 'https://depmap.org/portal/api/download/files' From 137df330f8d05265171e657da4d8cfdf0f513ba6 Mon Sep 17 00:00:00 2001 From: Abolfazl Arab Date: Sun, 24 May 2026 22:10:34 -0700 Subject: [PATCH 9/9] Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- candi/data/_database.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/candi/data/_database.py b/candi/data/_database.py index 776a90e..ee99059 100644 --- a/candi/data/_database.py +++ b/candi/data/_database.py @@ -22,7 +22,7 @@ def __setattr__(self, name: str, value: Any) -> None: if name == "_parent": object.__setattr__(self, name, value) return - self.add(name=name, dataset=value, overwrite=True) + self.add(name=name, dataset=value) def __dir__(self): return sorted( @@ -39,8 +39,17 @@ def add(self, name: str, dataset: Any, overwrite: bool = False) -> None: raise ValueError( f"Dataset name '{name}' is not a valid Python identifier for attribute access." ) + if name in object.__dir__(self): + raise ValueError( + f"Dataset name '{name}' conflicts with an existing namespace attribute." + ) if name in self._parent._datasets and not overwrite: raise ValueError( f"Dataset '{name}' is already loaded. Pass overwrite=True to replace it." ) + if name in self._parent._paths and not overwrite: + raise ValueError( + f"Dataset '{name}' is already defined as an available built-in dataset. " + f"Pass overwrite=True to replace it." + ) self._parent._datasets[name] = dataset