From e616b3e65e922a8149cdfba7090a07366a09fb34 Mon Sep 17 00:00:00 2001 From: Longlong Yao Date: Mon, 27 Oct 2025 11:20:26 +0800 Subject: [PATCH] librocdxg: use shared GPU memory as vram on small APU Signed-off-by: Longlong Yao Signed-off-by: Flora Cui --- openclose.cpp | 9 +++++- thunk_proxy/libthunk_proxy.a | Bin 307454 -> 307518 bytes topology.cpp | 7 +++- wddm/device.cpp | 61 +++++++++++++++++++++++++---------- 4 files changed, 58 insertions(+), 19 deletions(-) diff --git a/openclose.cpp b/openclose.cpp index d903e2aa44..2435c4fc94 100644 --- a/openclose.cpp +++ b/openclose.cpp @@ -145,7 +145,14 @@ bool hsakmtRuntime::ReserveLocalHeapSpace() { device = get_wddmdev(j+1); if (device == nullptr) return -1; - total_local_size += wsl::AlignUp(device->LocalHeapSize(), align) * 4; + /* + * For APU, use non local memory(shared GPU memory) as GPU memory, + * because it has small local memory + */ + if (device->IsDgpu()) + total_local_size += wsl::AlignUp(device->LocalHeapSize(), align) * 4; + else + total_local_size += wsl::AlignUp(device->NonLocalHeapSize(), align) * 4; } local_heap_space_start_ = 0; diff --git a/thunk_proxy/libthunk_proxy.a b/thunk_proxy/libthunk_proxy.a index 4b1ed96afcf2ae5581e9116c500b9c1b9f3881b7..afc99652dfda495d83179cb015751046c282f23d 100644 GIT binary patch delta 9386 zcma)>dwf$>w#ReM3DWfAwV^_jmO=vv6mTFY0y<2nI%@GnR3MR8qe7_&Hpo+iG(1~C zY_QT*bhK4baeUAj27-6S&{0RYdasR-A-#$a7(a@SkvMRbkB{{JR@Oey^smY113PErpa6Zx&T%reGaHm1QlYou1Sr)E}D!L83)ESBYyHhhq`eh0plZ&`9l zPB?U-C2#!)JY5%Cp<&Eh|1rRAi@6=_!zL{6@+lHM>Zg(n2Id7K*tQlt-oV{5&vfMPC*K2R=*|zpQT^qC~u;Hy9T5QkYEmv4C*`Ms} zym`~Nn@*kd9PR9!v~_P_g2jSa;26t4$G5}B*M;h%p?YnFzah2X9{v6cZHz;EH9DeG z`+B$*Ajfchxo*j)$Z-9zRu-K%LZ7T@!D#eSys2lhK0QWT zEcup0mN?a4qdSUH7m4^n87vhFTk@#w8oi*GKH5b-2XE1-bN+Y z!h2ldaSAV0*ls89wR(Z>vPY|~)i?IiTBE0?=m%D7&(iJ>;MS?^blAO(e?Ozz{JEQs z0qcA6$om1zOYLO;5aSiS8sF0kv;#EpL(mhw$j#F)sG6tW^vWn}-%&G9r=eawjRI{p zwexgtKE=O=auap(^rL)g`v}udXw*lT_PUtdJe@!lJe_ti1wO{~0jlTePZv{JAEQ9O zpm)Ydcl9RwN!>xG7|!aQG1ZrGL}ht`_{?D?-$^LJ<;?Ja4XrQJOB z_n|TyJU`Tj+OQP8qYni>#rSC-O7Z+mA1T031-x+G@1RsM)-;!@55s!CgChS2@i&eP zecef}c1&G`W!xHlT$yIj zi(C}sd5J4ixuUREfpKnfl~?1cj)VJf@cnvsH@cY_+Sd6klNrzTqw#r0?(f*Q(ne@~ZCkAhyU#ws<|sj*d!ZEE~R zjX9T!-BoIwDWi6p+K<8eYU<>HGJ~G$rZ69Nqt!TDja$_CCwF5K_n#%g30Hdjur-Mdk7O0OLO-WwpI+}bOWy-rOlc`ooHOKV? z?K`2{bAuAVQcys_6Nosl;C#f(@inaY$O38sFTAQ7?}eV`&vn#(0$q2TD>Jij!fZZ7 z;aSuPtP?iuBAjhL8ubq}>hu55?Y3=7rJ5>Oc9><~DOoMmLsr<(6F7*eh%~F34a)6aJ0s04DEs2#^nCS zI6B(T2+H?*49EH#JXDuthE5skU71OnY{ggKtxVW##?cH#ICNf@9EOKWzyq~HWuTs{oZn&J98P8F%RUx~?ZfAU{ z!tQR?Ez~*KSgtLIE+1ml8`{ih&*4UAPT>NF=;d?y@G&d7AX;*{af6{vh|U{plxP@h z$I1A@co`2~X}sWszv|nKahkS(c2}dR7tnz_I0{V+W@eM8WQOXXF`4#FG3?MkIK}uv z$9354Gj7222cJ;`^XT$B4Ln3Gpo7zmQ5b%hj-oH1+8LQyRE95RWL8iy)97Rm6%|GW zEDux|Wc@ci9IRxwQQtfx+Qmcle4{6O_&1lrRs14oG%F9|D7HAWvi8LWp8F<5hb}Xg zbLE;JG!Am*<~?NGZ~?~MYm6U?3*+j+Z@ewL;wd`tl;N;h)Du{M@|tls|Nbe%Ug%b` zx^5a=Uj#}|RTd~vN2ij=IPGhWwC)&K{jaq(^ zrCnT@V6^#V9$V;zR#dz-+Vc+vk-q6N+IB|kPqA{VQ;Sog+&WQz?bi**_ZLw88O=_f zNmi%rwO`9BH42~6O7zi|#;I0EjuQ5y2!s@4#dwxXUumK6jn@9!;EamC)Iw!+WlJh3$^Amm-{m@O&4=dS4$_L9L*|IdtT9j;{ z=y`6oI;rtyYbk~J+hP6|!#A7zHxk2)&$57$^-#9AKo(Ro6*mN#KE9TkZb6>mY?cPo zyyUpmI?)zUXp*=J-3nAi1Y}JDQEHsto0_@qX-2K&m<;6dWTDVxphHBA0@*0dC`k@4 zP)jxnd4bLnF-m5mG>~quBj;^akJU9XJJ@YtrFGQAERfC8U|JbDZpX@6fcSHzTG{PD zej-N6Y?NjcB*z^>WCNx6uMI!rWv)6V+v4PHVREaiq1{cGzGLVJkx;M$Ty9{W}_I8p0k-c zrXj5RTJgADnfYMmC7%ywe^bbs**p{j%8e`8+nVgYH0w8!!w>zI>%>*FxC;4!f<%la zUN0zLT&00*0VPxFo;#slPQE*#-kGf)1Io?6L0r`6bT#k$G=#cqD>wsrDKQ;=R5b;?V!skSd*HN|Lt|%HUAdO2qQ3V6Sz2fY z^jnB#Aff#;gsrF4X~h0zhP6=7X&~oJRIqHOuqV{AXL4SR)Wj^Q*u%oo%h{O*+4C`#p)_$H5({S#OPy%@Pq9PchGEJRE4PDpstsv z0vTAE$X|gBya9o6@f8C@qa`Zf%XOEarGh*Gpk3q(a0UvQJqTorD_K7kI)KnACHspm z_4M2YA06bo%Q`W)b&j|XsQ-~nNGiT=L1(2pzGVWppDeO&EI=(jEr{i*6| zSMt?+ikd2M^?EE7cBQI$C47}B*=y#y=fM{?)jX`*vO@H~QY^;oEG58fsg=UEsdZOb zUAD+WB9rdWPq-TWRtHfPTm@DMT4| zWE;g-8v6W@u>`)_6xv}n+!DSrf2O7-C{^Hf@%5#GX+V_9u@r{h1H#NM4lRYD>vd{c z%Ij(q=vF&>DevsRimX@_!n2GE5hMRHL~^|;@Hw;R7&;G72cRwfmcX>C((+6xe9O7I ze-m_}X$Pje+NfhWoF?DShMpBLJVd?~C~xYZu+LO^83if9D0xWGaTV@CxU{@M{s%dg z{}OggnSpW7JqWW_ej|pDLY|eJ$(!U~39|^l;5yG@V7LH@l`sqPOCx+fRM8#+DkI-R zK;HimWCmjtrvxMSds)b{3J8s}3dr7`g<_0Slwg!n$c(ibC``W9K=HU(eXkYu^CAJn;xB@`ANrmn))KRgHSN9S5*CCwu zD{(noJq^VG`3}7fhT)X3`Ko0->p9O3@~=l(zWPpJq1wIcv2pDG64{^B#!16~x98}JT%Doj}qAtb%aBfn^Lm1Y8QDTnHBjW2p%3M z`gU`*%&^0lMXnNNHE*liII*9|zYU4CTqE!*(-4r))NQx5lrmg!M2+*lt4b59ah zYVI*+K}s+SOcqvaCfb02_6x}0fPkrK-2wvgb!lLy)3cp=9s|Rp`(x}hAS}l`Lzv;g zfmx9M-hq$HOmsUK&dBYkZe+eN^B4!xa~#yM9own;K4E>O@?#rO<%p8Gd&tQWYJ@NR z@NYz%wxH<0qvE8&bayd12@8uX5%`vZA%Y<`86k-&nZNd0EX5M&dqSX~k}XkBG@d4Q zRzm(JIQwj^IIA&e%^9tL_BFA^MiKT?M|v6-D4k;mFLSf75$a^v0fZM1jM@~+QN6Uo z>cW+BJdOn=6&hus_xZ zkA){WKsWsunC)b?ewy`hIz9z`%T|eP`VTz?{c*qX@Rb)>Kx8*SMw_w38#mt0afVQe^gESkAA4blnVG!ftUWXu+N9{P3Udmj2Og-rbzn5`;XKh664$?*d8!`<{lFF^lLwtfsU+&vSlpX{dZ z*#-U5N3$;gWV&+%HSNMf!FVuvQHhs6cu@()KLuXo*viaeReShFFti?+E!-$BGh4re z(m>~A@?M!dzwaVbdZDf|_5OKKfZATP7MwE|RXx;Ath5u|#HQnyx&%`R#Jb9KJ|K}O z%hsyomCW-p4x1JwOPPlaguK|7QHU0j#m3DpWrpMN6%-=alvRk(EAZb<7V9cgJ&|Tv zSlPPx(8G(BS8;!C>FUox%UHIns|@Gns|de>60ahBt3o5yjb^viWgKnN8jkPpBX9ge zyAddPE|46WkmXu-NaC*H^annn(-7kR>}bWG4EXm=D|SQ6kD@bj8Q(1WYrUwZ)#}Kt On_^_{il5HOi2omGKY0NF delta 9307 zcma)>dvsHEw#ReM3DS0PX^YaN<!mrg^sfPTYNk3_?lPiW3Sd5%d29;P8e6ZjHR)= z+l{{sHoR0h*j#2N^C>ylJY?j>c3fhPH;nSwnM=(k6XX6;v(hvIvEnPt#=QKcTogWC zE_XP3I1V@>)O4-sDoS)0aW@$p(-kMXH(X~H6t*fZRvc68pwjD1S7BbR$d@VZdxba; z(RARVh~nXjmnd${r4YQEZMm_X*O|5XMop}AlDY3eV-cPF3~o4a`4-`Pp2mIEb-JK9zrB78vzZ&G@x^3h*>WyLh@cKdo)NOGkM+um>f(S_Q@s z%KaSEt9nr0_fVcoWjuYpN5dDG{f-u3W*n2D8#OW;cs@nG1iY4-c>3Y_RMXQcFb|#2 zCT83D6k^l!sq_ob%Pye6=MZ17W7P$e#N1dx^^9vSpa@U5&_15NdVyqdfKKwXq$ky6 z!}YkH4NdT|vnNHq#`sE4^78z>o>KpZ1?jrY5*K;AHi`rUH$Vh5WtLwC$N?TwZ?L~f2^Nf!3dsBd|mh={{J9>-td$f<$&+2p{ox~JZ zEIk}g52mv(B{7gcy0WpIK3Fm?}{nP^OI1+ z+9}%PIoE^_IyB+1hstr03myH0J2A|`*0Go13R>_bNAK4FFUC%4#H{cwRpyR1f};rhctvbGNo zq9oRO(Xm12${f<6juXoNrvoqTz|X+X5nR8Na;~JQLnU2k`VoZdRw7#}za$;D=(Un_ z;f8i__j3tVBvIe!-V&ZS~^ zsf@;-D7O{PZ=y0D=A;Mn%Th}4VTbcRJr2=vijHf&RNe~yjF&2KQHoykN+n!)vP0o- zd#Q#mS`+N}G&qKN4_r$xwej>PZ@NG;?fSVYKg<* zb37DUh{J6VEnw}TdZBFXEvj8cjf}^2!apjmq$Xgh{WIb(RM`S*XW4F+y{59URQe5i zKPfi)8?$L>;YazL8SV^ZNax6rKcZ5%wd@!CTxT5H=e7oQF}hKGUu!t^?P~?(dp(BY zVvC1eGL(7JvnCm)_e;-C%23@uJRxeMGJe|D`o`lsgdkpXyv!<1aWsWdp5c z#;90qpjB@fm&Qg9w%W4_N4dmX5AH5LJn-MB*z}>+Xv^pp+cC^4HZblVF5_QE$k_F2 zYlj>DcHLqPH;hqqasd|gC~BX;QK&kQp4CrG54!@^csf1N%7cE_N!DLYT!)jD)@V$J zS6S6CkHv1aa1%9(x=yi5F$|xA-jAaFGCL!~psDE!8mC(A?4fbGH651i(=BrTiypep zVYe}wW5qgn*fr1U${zm3Qn-tQ7Fvz!VK}80r7O!_Y~jAITWro!YZ-U$=n88;cW%f1 z*68yvo?K;}>0KDnd#n-J-0yQ8Pf+{Qma8bK*xw1ab|-J6Rgmq_Te~_cZp6L9>_#iE zFr>0Qoopgh51pb`#jTw%?gA5y))6O;qQ>VfzN3w~w_C#u+|rJJ!CKF^vdufWGv%?P zFJq)5FI!pkL)5CmedFlgTSVrqSJ0=YjJ}i_<8&J#a*cJCnHzq!HUF&wYC2`)QE;r& zoxR~#QmH|SQ%13QxufAmrz=Z^T`376rC2eNAv0GxsOFAyngi0B<`M@b#yed`EroyM z^kuiIO@aFP4g5H8GtVEfAm2Er+ib0+s&UZwDpUP9SayZVx~mVC@dE*WROY)8vVh8P z8^JjTSr$>5#)?BmsIP=l{Ou5b^W6kJ?=|ADi`ufRTxA*~4w-qjmRfE?o)wwQcQYEC zN0m1_%d-87#)_*rqY`Qcaz=s3JHotxj&l2b-0I>GFhD2iS^XN%v+Wj?H0InP~|PCEC9rxky;re zH#IYgW+LANAUugp0BXxbaYkdPnUS|V?ay(mIP^^f@>AtRpim}?1DP##)G`sxZ&UUm zwf$tcXxJpDCp+#Qh$cD9X!0b!-Yl3)uF3H2xmI-Ns_tZ<8X`uaOq2pL z4{RWJCBizc6SHk^A)C-w1^p_jtb%^vdU4e#uHuZm z)XXUN20=ZNck@XXaS( zKRbsmKV|fxcW!eQW`8h2gmW~17+{IHreGb7n27aIF%oOG{U-!nI>qV9NkYb-H_wyO z;!~jSr)KD9*G&<&mb@JMnyJo0^S~oiH5DDqn=0%HEqf~GwT)VsMU`z5=9>nVY@n)X zVCB=q?g6y}!8SD=EHGU|3!*KYX<*S|TXu2bxR7C@@2;bWx^&vLh<%qoHRYbV6l& zP3dQF27H8xW;n}oYG#U$fTopVR!*)vIP@%GA=wB^?|`qG*&-`->b1E8`e+vGN9PH< zLpRQy(D&RevI%zVJE89(x|3ti7iPC4#Vks$necVsUSYTs=1sVCCVV*-i0oO-H9j+) zYcuQb)SKc9HTEp%Csj7pZuuxr>JE%uHF~QMrzOJz*pW9(SOUXdk%bIQ_Vr$Nquqiow5|OJ>;5;IEj^Fw?*ro zi^Io%zsO{Dn8!VonCNs-GxT%34+vXn=X@7@;gGuvEc&3ZAMAdCnQakjxeFULQZ3M} zN%-c$Fie&6ki?!R8ll;z(jaFm@Ups}1> z_i~z>gxOo^ULY(*pd8O;LA$!jvQ9C>4qO1U|+u(!WUI1qc_|ChSDE+5vKT z0WQ5!*nL^L?E+w^k6G&uVK?WfoiNuz#Phx;Y?j7b2(vvzKsgabXYK01%oA^r`#vng z$eUs}M(yr{9m=~8F547M7L;OCL#{<|S+QHJHtL2BE`rOmyD7W~F28(Rpi5Tv=|xzL zjz5U3R($yu!x!!{7QcGx-4C;Jetm?`L5+4l zkb{Vk=VL*3Fh(JAJpknRBm)H>0K&py)S8J>jH2Xv5Gc_kR(7lhfy#*(MUsL}YhgfU z@_lj#x#&-Y`8CBL%$5@c(SOHhf^OHPm|})#)nNWZ!W!*mTFs68fWp=A>G(?Em2y+? zbG5UO&(j*DQm$yFhO2>D+s72H!6i*7>z-aj#nfJdCkqd`R>3a$wYbz0f~#P6_7e)P zf?e{6uzvPo0m%0=t6_KQFBz#kxEiTc5i#<9D@apGt%en@z#3E+RdlDNN5vYh?jVKN zAe`qru^X(nLn%Q1iCzoCiZ)^Mbjb$Sa-Pi;UW>N8{iDD_-MwqEaavXOhHf0+Iyl4j zT?c28Q=FT555_U;eYhL7{UGCF{Z3 z4pY^7Bp3qYPnBGpI2iihydFN=6pgdv)q#zm$~v%ehmkg$uSLh1RZ%locGM8|vbgkZ z018p%22Rry)NY$G+eyu7<`j0XoX6(jaw@;s>7vSwycW6(d_sQ?H=>V$T#>zEAN(8P z48JoEA&{p~ndTmR2n^ehS+bX~tL!s|8P?n;u)IFP4%-)V6BvFln6>p4)SVy*^C|^_@2U>v3kk}2yE75Qh>bd9)_W(L|Cnz=)+)Gt<0i>Gg%7EZ2OVi z_3ZRAfwS#Q>tTq)upZrdVTiCRb=B45H=|W$hxG^#p@WH1(Oa#%C>TmtL?HyP7%nW& zu9g{g*cLGV)xw>m`~g2V#X3cT7j1mrXI5w7TZVb|MjdIStVtjru^g_YQQ zjG32QTfw~Jg{`*}-HL$8E)?F1fQiYS0s``Nd6b>rmgzM3DA-&gW?sLrEc*;$h8u@% zVCDR;4}3g!qT9f5MuKJg=LxfqaUkf>_=r2%i>e5P`4Ynye5LIJ!t4<$bgK`@ zc(Xr=n0`fzR6qD6^jk9Z8DtK zXQsaIY3Sp0d>Z_?|V)+2sww(p$fnd@mu4EtB`l z6znKdy?95NzK6ch-|&*N;1_GrG=rH^)osp{+FpP6|D&fm%1k~Tk>$(KyH+z-SrD=u zzslO|lLkVr^ksCxPf}^pUZSsn;cR>bUGRo8x)2BIEK7Bi*%$a#&R1o<4YyY0|+CRPvfg-;Il2aG5oP;eK zOh5j_FAk&dT0xaj{NI2dZlc%<{(neuLzeO{MPDPI_C=kpoG}xv^iSf~Gt=V#19X*i A=Kufz diff --git a/topology.cpp b/topology.cpp index ee23adb908..2db712e341 100644 --- a/topology.cpp +++ b/topology.cpp @@ -642,7 +642,12 @@ static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id, return ret; props.HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE; - props.SizeInBytes = device->LocalHeapSize(); + + if (device->IsDgpu()) + props.SizeInBytes = device->LocalHeapSize(); + else + props.SizeInBytes = device->NonLocalHeapSize(); + props.Width = device->MemoryBusWidth(); props.MemoryClockMax = device->MaxMemoryClockMhz(); diff --git a/wddm/device.cpp b/wddm/device.cpp index 80e3dc4d62..f51af85404 100644 --- a/wddm/device.cpp +++ b/wddm/device.cpp @@ -152,37 +152,64 @@ bool WDDMDevice::GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE segment_type, return false; } +/*Local heap(dedicated GPU memory) includes visiable heap and invisiable heap. + *Non local heap refers to shared GPU memory and it is sytem memory. + */ uint64_t WDDMDevice::VramAvail(void) { D3DKMT_QUERYSTATISTICS stats; NTSTATUS ret; uint64_t usedVis = 0; uint64_t usedInv = 0; + uint64_t usedNonLocal = 0; + uint32_t segmentId = 0; // wait fence complete uint64_t value = page_fence_value_.load(); if(!CpuWait(&page_syncobj_, &value, 1, false)) return HSA_STATUS_ERROR; - // local cpu-visible memory - memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS)); - stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT; - stats.AdapterLuid = adapter_luid_; - stats.QuerySegment.SegmentId = 0; - ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats)); - if (ret == 0) - usedVis = stats.QueryResult.SegmentInformation.BytesResident; + if (IsDgpu()) { + // local cpu-visible memory + if(!GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE_MEMORY, segmentId)) + return HSA_STATUS_ERROR; - // local invisible memory - memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS)); - stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT; - stats.AdapterLuid = adapter_luid_; - stats.QuerySegment.SegmentId = 1; + memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS)); + stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT; + stats.AdapterLuid = adapter_luid_; + stats.QuerySegment.SegmentId = segmentId; + ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats)); + if (ret == 0) + usedVis = stats.QueryResult.SegmentInformation.BytesResident; - ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats)); - if (ret == 0) - usedInv = stats.QueryResult.SegmentInformation.BytesResident; + // local invisible memory + if (device_info_.local_invisible_heap_size) { + segmentId++; + memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS)); + stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT; + stats.AdapterLuid = adapter_luid_; + stats.QuerySegment.SegmentId = 1; - return LocalHeapSize() - usedVis - usedInv; + ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats)); + if (ret == 0) + usedInv = stats.QueryResult.SegmentInformation.BytesResident; + } + + return LocalHeapSize() - usedVis - usedInv; + } else { + // APU - NonLocal memory + if(!GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE_SYSMEM, segmentId)) + return HSA_STATUS_ERROR; + + memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS)); + stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT; + stats.AdapterLuid = adapter_luid_; + stats.QuerySegment.SegmentId = segmentId; + ret = DXCORE_CALL(D3DKMTQueryStatistics(&stats)); + if (ret == 0) + usedNonLocal = stats.QueryResult.SegmentInformation.BytesResident; + + return NonLocalHeapSize() - usedNonLocal; + } } bool WDDMDevice::CreateDevice(void) {