Merge branch 'drm-next-5.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
New stuff for 5.3: - Add new thermal sensors for vega asics - Various RAS fixes - Add sysfs interface for memory interface utilization - Use HMM rather than mmu notifier for user pages - Expose xgmi topology via kfd - SR-IOV fixes - Fixes for manual driver reload - Add unique identifier for vega asics - Clean up user fence handling with UVD/VCE/VCN blocks - Convert DC to use core bpc attribute rather than a custom one - Add GWS support for KFD - Vega powerplay improvements - Add CRC support for DCE 12 - SR-IOV support for new security policy - Various cleanups From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190529220944.14464-1-alexander.deucher@amd.com
This commit is contained in:
@@ -21,7 +21,7 @@
|
||||
*/
|
||||
|
||||
static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||
0xbf820001, 0xbf82012b,
|
||||
0xbf820001, 0xbf820121,
|
||||
0xb8f4f802, 0x89748674,
|
||||
0xb8f5f803, 0x8675ff75,
|
||||
0x00000400, 0xbf850017,
|
||||
@@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||
0x8671ff71, 0x0000ffff,
|
||||
0x8f728374, 0xb972e0c2,
|
||||
0xbf800002, 0xb9740002,
|
||||
0xbe801f70, 0xb8f5f803,
|
||||
0x8675ff75, 0x00000100,
|
||||
0xbf840006, 0xbefa0080,
|
||||
0xb97a0203, 0x8671ff71,
|
||||
0x0000ffff, 0x80f08870,
|
||||
0x82f18071, 0xbefa0080,
|
||||
0xbe801f70, 0xbefa0080,
|
||||
0xb97a0283, 0xbef60068,
|
||||
0xbef70069, 0xb8fa1c07,
|
||||
0x8e7a9c7a, 0x87717a71,
|
||||
@@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||
|
||||
|
||||
static const uint32_t cwsr_trap_gfx9_hex[] = {
|
||||
0xbf820001, 0xbf82015d,
|
||||
0xbf820001, 0xbf82015e,
|
||||
0xb8f8f802, 0x89788678,
|
||||
0xb8f1f803, 0x866eff71,
|
||||
0x00000400, 0xbf850037,
|
||||
0x866eff71, 0x00000800,
|
||||
0xbf850003, 0x866eff71,
|
||||
0x00000100, 0xbf840008,
|
||||
0xb8fbf803, 0x866eff7b,
|
||||
0x00000400, 0xbf85003b,
|
||||
0x866eff7b, 0x00000800,
|
||||
0xbf850003, 0x866eff7b,
|
||||
0x00000100, 0xbf84000c,
|
||||
0x866eff78, 0x00002000,
|
||||
0xbf840001, 0xbf810000,
|
||||
0xbf840005, 0xbf8e0010,
|
||||
0xb8eef803, 0x866eff6e,
|
||||
0x00000400, 0xbf84fffb,
|
||||
0x8778ff78, 0x00002000,
|
||||
0x80ec886c, 0x82ed806d,
|
||||
0xb8eef807, 0x866fff6e,
|
||||
@@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
|
||||
0x8977ff77, 0xfc000000,
|
||||
0x87776f77, 0x896eff6e,
|
||||
0x001f8000, 0xb96ef807,
|
||||
0xb8f0f812, 0xb8f1f813,
|
||||
0x8ef08870, 0xc0071bb8,
|
||||
0xb8faf812, 0xb8fbf813,
|
||||
0x8efa887a, 0xc0071bbd,
|
||||
0x00000000, 0xbf8cc07f,
|
||||
0xc0071c38, 0x00000008,
|
||||
0xc0071ebd, 0x00000008,
|
||||
0xbf8cc07f, 0x86ee6e6e,
|
||||
0xbf840001, 0xbe801d6e,
|
||||
0xb8f1f803, 0x8671ff71,
|
||||
0xb8fbf803, 0x867bff7b,
|
||||
0x000001ff, 0xbf850002,
|
||||
0x806c846c, 0x826d806d,
|
||||
0x866dff6d, 0x0000ffff,
|
||||
@@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
|
||||
0x8f6e8378, 0xb96ee0c2,
|
||||
0xbf800002, 0xb9780002,
|
||||
0xbe801f6c, 0x866dff6d,
|
||||
0x0000ffff, 0xbef00080,
|
||||
0xb9700283, 0xb8f02407,
|
||||
0x8e709c70, 0x876d706d,
|
||||
0xb8f003c7, 0x8e709b70,
|
||||
0x876d706d, 0xb8f0f807,
|
||||
0x8670ff70, 0x00007fff,
|
||||
0xb970f807, 0xbeee007e,
|
||||
0x0000ffff, 0xbefa0080,
|
||||
0xb97a0283, 0xb8fa2407,
|
||||
0x8e7a9b7a, 0x876d7a6d,
|
||||
0xb8fa03c7, 0x8e7a9a7a,
|
||||
0x876d7a6d, 0xb8faf807,
|
||||
0x867aff7a, 0x00007fff,
|
||||
0xb97af807, 0xbeee007e,
|
||||
0xbeef007f, 0xbefe0180,
|
||||
0xbf900004, 0x87708478,
|
||||
0xb970f802, 0xbf8e0002,
|
||||
0xbf88fffe, 0xb8f02a05,
|
||||
0xbf900004, 0x877a8478,
|
||||
0xb97af802, 0xbf8e0002,
|
||||
0xbf88fffe, 0xb8fa2a05,
|
||||
0x807a817a, 0x8e7a8a7a,
|
||||
0xb8fb1605, 0x807b817b,
|
||||
0x8e7b867b, 0x807a7b7a,
|
||||
0x807a7e7a, 0x827b807f,
|
||||
0x867bff7b, 0x0000ffff,
|
||||
0xc04b1c3d, 0x00000050,
|
||||
0xbf8cc07f, 0xc04b1d3d,
|
||||
0x00000060, 0xbf8cc07f,
|
||||
0xc0431e7d, 0x00000074,
|
||||
0xbf8cc07f, 0xbef4007e,
|
||||
0x8675ff7f, 0x0000ffff,
|
||||
0x8775ff75, 0x00040000,
|
||||
0xbef60080, 0xbef700ff,
|
||||
0x00807fac, 0x867aff7f,
|
||||
0x08000000, 0x8f7a837a,
|
||||
0x87777a77, 0x867aff7f,
|
||||
0x70000000, 0x8f7a817a,
|
||||
0x87777a77, 0xbef1007c,
|
||||
0xbef00080, 0xb8f02a05,
|
||||
0x80708170, 0x8e708a70,
|
||||
0xb8f11605, 0x80718171,
|
||||
0x8e718671, 0x80707170,
|
||||
0x80707e70, 0x8271807f,
|
||||
0x8671ff71, 0x0000ffff,
|
||||
0xc0471cb8, 0x00000040,
|
||||
0xbf8cc07f, 0xc04b1d38,
|
||||
0x00000048, 0xbf8cc07f,
|
||||
0xc0431e78, 0x00000058,
|
||||
0xbf8cc07f, 0xc0471eb8,
|
||||
0x0000005c, 0xbf8cc07f,
|
||||
0xb8fa1605, 0x807a817a,
|
||||
0x8e7a867a, 0x80707a70,
|
||||
0xbef60084, 0xbef600ff,
|
||||
0x01000000, 0xbefe007c,
|
||||
0xbefc0070, 0xc0611c7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x80708470, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611b3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc0070, 0xc0611b7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x80708470, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611bba, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc0070, 0xc0611bfa,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x80708470, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611e3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0xb8fbf803,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611efa, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc0070, 0xc0611a3a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x80708470, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611a7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0xb8f1f801,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611c7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0x867aff7f,
|
||||
0x04000000, 0xbeef0080,
|
||||
0x876f6f7a, 0xb8f02a05,
|
||||
0x80708170, 0x8e708a70,
|
||||
0xb8fb1605, 0x807b817b,
|
||||
0x8e7b847b, 0x8e76827b,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbef20174, 0x80747074,
|
||||
0x82758075, 0xbefc0080,
|
||||
0xbf800000, 0xbe802b00,
|
||||
0xbe822b02, 0xbe842b04,
|
||||
0xbe862b06, 0xbe882b08,
|
||||
0xbe8a2b0a, 0xbe8c2b0c,
|
||||
0xbe8e2b0e, 0xc06b003a,
|
||||
0x00000000, 0xbf8cc07f,
|
||||
0xc06b013a, 0x00000010,
|
||||
0xbf8cc07f, 0xc06b023a,
|
||||
0x00000020, 0xbf8cc07f,
|
||||
0xc06b033a, 0x00000030,
|
||||
0xbf8cc07f, 0x8074c074,
|
||||
0x82758075, 0x807c907c,
|
||||
0xbf0a7b7c, 0xbf85ffe7,
|
||||
0xbef40172, 0xbef00080,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xbee80080, 0xbee90080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xe0724000, 0x701d0000,
|
||||
0xe0724100, 0x701d0100,
|
||||
0xe0724200, 0x701d0200,
|
||||
0xe0724300, 0x701d0300,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8fb4306, 0x867bc17b,
|
||||
0xbf84002c, 0xbf8a0000,
|
||||
0x867aff6f, 0x04000000,
|
||||
0xbf840028, 0x8e7b867b,
|
||||
0x8e7b827b, 0xbef6007b,
|
||||
0xb8f02a05, 0x80708170,
|
||||
0x8e708a70, 0xb8fa1605,
|
||||
0x807a817a, 0x8e7a867a,
|
||||
0x80707a70, 0x8070ff70,
|
||||
0x00000080, 0xbef600ff,
|
||||
0x01000000, 0xbefc0080,
|
||||
0xd28c0002, 0x000100c1,
|
||||
0xd28d0003, 0x000204c1,
|
||||
0xd1060002, 0x00011103,
|
||||
0x7e0602ff, 0x00000200,
|
||||
0xbefc00ff, 0x00010000,
|
||||
0xbe800077, 0x8677ff77,
|
||||
0xff7fffff, 0x8777ff77,
|
||||
0x00058000, 0xd8ec0000,
|
||||
0x00000002, 0xbf8cc07f,
|
||||
0xe0765000, 0x701d0002,
|
||||
0x68040702, 0xd0c9006a,
|
||||
0x0000f702, 0xbf87fff7,
|
||||
0xbef70000, 0xbef000ff,
|
||||
0x00000400, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8fb2a05,
|
||||
0x807b817b, 0x8e7b827b,
|
||||
0x8e76887b, 0xbef600ff,
|
||||
0x01000000, 0xbefc0084,
|
||||
0xbf0a7b7c, 0xbf840015,
|
||||
0xbf11017c, 0x807bff7b,
|
||||
0x00001000, 0x7e000300,
|
||||
0x7e020301, 0x7e040302,
|
||||
0x7e060303, 0xe0724000,
|
||||
0x701d0000, 0xe0724100,
|
||||
0x701d0100, 0xe0724200,
|
||||
0x701d0200, 0xe0724300,
|
||||
0x701d0300, 0x807c847c,
|
||||
0x8070ff70, 0x00000400,
|
||||
0xbf0a7b7c, 0xbf85ffef,
|
||||
0xbf9c0000, 0xbf8200da,
|
||||
0xbef4007e, 0x8675ff7f,
|
||||
0x0000ffff, 0x8775ff75,
|
||||
0x00040000, 0xbef60080,
|
||||
0xbef700ff, 0x00807fac,
|
||||
0x8670ff7f, 0x08000000,
|
||||
0x8f708370, 0x87777077,
|
||||
0x8670ff7f, 0x70000000,
|
||||
0x8f708170, 0x87777077,
|
||||
0xbefb007c, 0xbefa0080,
|
||||
0xb8fa2a05, 0x807a817a,
|
||||
0x8e7a8a7a, 0xb8f01605,
|
||||
0x80708170, 0x8e708670,
|
||||
0x807a707a, 0xbef60084,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611efa, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611b3a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611b7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611bba,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611bfa, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611e3a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xb8f1f803, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611c7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611a3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611a7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xb8fbf801, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611efa,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0x8670ff7f, 0x04000000,
|
||||
0xbeef0080, 0x876f6f70,
|
||||
0xb8fa2a05, 0x807a817a,
|
||||
0x8e7a8a7a, 0xb8f11605,
|
||||
0x80718171, 0x8e718471,
|
||||
0x8e768271, 0xbef600ff,
|
||||
0x01000000, 0xbef20174,
|
||||
0x80747a74, 0x82758075,
|
||||
0xbefc0080, 0xbf800000,
|
||||
0xbe802b00, 0xbe822b02,
|
||||
0xbe842b04, 0xbe862b06,
|
||||
0xbe882b08, 0xbe8a2b0a,
|
||||
0xbe8c2b0c, 0xbe8e2b0e,
|
||||
0xc06b003a, 0x00000000,
|
||||
0xbf8cc07f, 0xc06b013a,
|
||||
0x00000010, 0xbf8cc07f,
|
||||
0xc06b023a, 0x00000020,
|
||||
0xbf8cc07f, 0xc06b033a,
|
||||
0x00000030, 0xbf8cc07f,
|
||||
0x8074c074, 0x82758075,
|
||||
0x807c907c, 0xbf0a717c,
|
||||
0xbf85ffe7, 0xbef40172,
|
||||
0xbefa0080, 0xbefe00c1,
|
||||
0xbeff00c1, 0xbee80080,
|
||||
0xbee90080, 0xbef600ff,
|
||||
0x01000000, 0xe0724000,
|
||||
0x7a1d0000, 0xe0724100,
|
||||
0x7a1d0100, 0xe0724200,
|
||||
0x7a1d0200, 0xe0724300,
|
||||
0x7a1d0300, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8f14306,
|
||||
0x8671c171, 0xbf84002c,
|
||||
0xbf8a0000, 0x8670ff6f,
|
||||
0x04000000, 0xbf840028,
|
||||
0x8e718671, 0x8e718271,
|
||||
0xbef60071, 0xb8fa2a05,
|
||||
0x807a817a, 0x8e7a8a7a,
|
||||
0xb8f01605, 0x80708170,
|
||||
0x8e708670, 0x807a707a,
|
||||
0x807aff7a, 0x00000080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc0080, 0xd28c0002,
|
||||
0x000100c1, 0xd28d0003,
|
||||
0x000204c1, 0xd1060002,
|
||||
0x00011103, 0x7e0602ff,
|
||||
0x00000200, 0xbefc00ff,
|
||||
0x00010000, 0xbe800077,
|
||||
0x8677ff77, 0xff7fffff,
|
||||
0x8777ff77, 0x00058000,
|
||||
0xd8ec0000, 0x00000002,
|
||||
0xbf8cc07f, 0xe0765000,
|
||||
0x7a1d0002, 0x68040702,
|
||||
0xd0c9006a, 0x0000e302,
|
||||
0xbf87fff7, 0xbef70000,
|
||||
0xbefa00ff, 0x00000400,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8f12a05, 0x80718171,
|
||||
0x8e718271, 0x8e768871,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc0084, 0xbf0a717c,
|
||||
0xbf840015, 0xbf11017c,
|
||||
0x8071ff71, 0x00001000,
|
||||
0x7e000300, 0x7e020301,
|
||||
0x7e040302, 0x7e060303,
|
||||
0xe0724000, 0x7a1d0000,
|
||||
0xe0724100, 0x7a1d0100,
|
||||
0xe0724200, 0x7a1d0200,
|
||||
0xe0724300, 0x7a1d0300,
|
||||
0x807c847c, 0x807aff7a,
|
||||
0x00000400, 0xbf0a717c,
|
||||
0xbf85ffef, 0xbf9c0000,
|
||||
0xbf8200dc, 0xbef4007e,
|
||||
0x8675ff7f, 0x0000ffff,
|
||||
0x8775ff75, 0x00040000,
|
||||
0xbef60080, 0xbef700ff,
|
||||
0x00807fac, 0x866eff7f,
|
||||
0x08000000, 0x8f6e836e,
|
||||
0x87776e77, 0x866eff7f,
|
||||
0x70000000, 0x8f6e816e,
|
||||
0x87776e77, 0x866eff7f,
|
||||
0x04000000, 0xbf84001e,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8ef4306, 0x866fc16f,
|
||||
0xbf840019, 0x8e6f866f,
|
||||
0x8e6f826f, 0xbef6006f,
|
||||
0xb8f82a05, 0x80788178,
|
||||
0x8e788a78, 0xb8ee1605,
|
||||
0x806e816e, 0x8e6e866e,
|
||||
0x80786e78, 0x8078ff78,
|
||||
0x00000080, 0xbef600ff,
|
||||
0x01000000, 0xbefc0080,
|
||||
0xe0510000, 0x781d0000,
|
||||
0xe0510100, 0x781d0000,
|
||||
0x807cff7c, 0x00000200,
|
||||
0x8078ff78, 0x00000200,
|
||||
0xbf0a6f7c, 0xbf85fff6,
|
||||
0xbef80080, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8ef2a05,
|
||||
0x806f816f, 0x8e6f826f,
|
||||
0x8e76886f, 0xbef600ff,
|
||||
0x01000000, 0xbeee0078,
|
||||
0x8078ff78, 0x00000400,
|
||||
0xbefc0084, 0xbf11087c,
|
||||
0x806fff6f, 0x00008000,
|
||||
0xe0524000, 0x781d0000,
|
||||
0xe0524100, 0x781d0100,
|
||||
0xe0524200, 0x781d0200,
|
||||
0xe0524300, 0x781d0300,
|
||||
0xbf8c0f70, 0x7e000300,
|
||||
0x7e020301, 0x7e040302,
|
||||
0x7e060303, 0x807c847c,
|
||||
0x8078ff78, 0x00000400,
|
||||
0xbf0a6f7c, 0xbf85ffee,
|
||||
0xbf9c0000, 0xe0524000,
|
||||
0x6e1d0000, 0xe0524100,
|
||||
0x6e1d0100, 0xe0524200,
|
||||
0x6e1d0200, 0xe0524300,
|
||||
0x6e1d0300, 0xb8f82a05,
|
||||
0x866eff7f, 0x08000000,
|
||||
0x8f6e836e, 0x87776e77,
|
||||
0x866eff7f, 0x70000000,
|
||||
0x8f6e816e, 0x87776e77,
|
||||
0x866eff7f, 0x04000000,
|
||||
0xbf84001e, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8ef4306,
|
||||
0x866fc16f, 0xbf840019,
|
||||
0x8e6f866f, 0x8e6f826f,
|
||||
0xbef6006f, 0xb8f82a05,
|
||||
0x80788178, 0x8e788a78,
|
||||
0xb8ee1605, 0x806e816e,
|
||||
0x8e6e866e, 0x80786e78,
|
||||
0x80f8c078, 0xb8ef1605,
|
||||
0x806f816f, 0x8e6f846f,
|
||||
0x8e76826f, 0xbef600ff,
|
||||
0x01000000, 0xbefc006f,
|
||||
0xc031003a, 0x00000078,
|
||||
0x80f8c078, 0xbf8cc07f,
|
||||
0x80fc907c, 0xbf800000,
|
||||
0xbe802d00, 0xbe822d02,
|
||||
0xbe842d04, 0xbe862d06,
|
||||
0xbe882d08, 0xbe8a2d0a,
|
||||
0xbe8c2d0c, 0xbe8e2d0e,
|
||||
0xbf06807c, 0xbf84fff0,
|
||||
0x8078ff78, 0x00000080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc0080, 0xe0510000,
|
||||
0x781d0000, 0xe0510100,
|
||||
0x781d0000, 0x807cff7c,
|
||||
0x00000200, 0x8078ff78,
|
||||
0x00000200, 0xbf0a6f7c,
|
||||
0xbf85fff6, 0xbef80080,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8ef2a05, 0x806f816f,
|
||||
0x8e6f826f, 0x8e76886f,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbeee0078, 0x8078ff78,
|
||||
0x00000400, 0xbefc0084,
|
||||
0xbf11087c, 0x806fff6f,
|
||||
0x00008000, 0xe0524000,
|
||||
0x781d0000, 0xe0524100,
|
||||
0x781d0100, 0xe0524200,
|
||||
0x781d0200, 0xe0524300,
|
||||
0x781d0300, 0xbf8c0f70,
|
||||
0x7e000300, 0x7e020301,
|
||||
0x7e040302, 0x7e060303,
|
||||
0x807c847c, 0x8078ff78,
|
||||
0x00000400, 0xbf0a6f7c,
|
||||
0xbf85ffee, 0xbf9c0000,
|
||||
0xe0524000, 0x6e1d0000,
|
||||
0xe0524100, 0x6e1d0100,
|
||||
0xe0524200, 0x6e1d0200,
|
||||
0xe0524300, 0x6e1d0300,
|
||||
0xb8f82a05, 0x80788178,
|
||||
0x8e788a78, 0xb8ee1605,
|
||||
0x806e816e, 0x8e6e866e,
|
||||
0x80786e78, 0xbef60084,
|
||||
0x80786e78, 0x80f8c078,
|
||||
0xb8ef1605, 0x806f816f,
|
||||
0x8e6f846f, 0x8e76826f,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xc0211bfa, 0x00000078,
|
||||
0x80788478, 0xc0211b3a,
|
||||
0xbefc006f, 0xc031003a,
|
||||
0x00000078, 0x80f8c078,
|
||||
0xbf8cc07f, 0x80fc907c,
|
||||
0xbf800000, 0xbe802d00,
|
||||
0xbe822d02, 0xbe842d04,
|
||||
0xbe862d06, 0xbe882d08,
|
||||
0xbe8a2d0a, 0xbe8c2d0c,
|
||||
0xbe8e2d0e, 0xbf06807c,
|
||||
0xbf84fff0, 0xb8f82a05,
|
||||
0x80788178, 0x8e788a78,
|
||||
0xb8ee1605, 0x806e816e,
|
||||
0x8e6e866e, 0x80786e78,
|
||||
0xbef60084, 0xbef600ff,
|
||||
0x01000000, 0xc0211bfa,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211b7a, 0x00000078,
|
||||
0x80788478, 0xc0211eba,
|
||||
0xc0211b3a, 0x00000078,
|
||||
0x80788478, 0xc0211b7a,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211efa, 0x00000078,
|
||||
0x80788478, 0xc0211c3a,
|
||||
0xc0211c3a, 0x00000078,
|
||||
0x80788478, 0xc0211c7a,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211c7a, 0x00000078,
|
||||
0x80788478, 0xc0211a3a,
|
||||
0xc0211eba, 0x00000078,
|
||||
0x80788478, 0xc0211efa,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211a7a, 0x00000078,
|
||||
0x80788478, 0xc0211cfa,
|
||||
0xc0211a3a, 0x00000078,
|
||||
0x80788478, 0xc0211a7a,
|
||||
0x00000078, 0x80788478,
|
||||
0xbf8cc07f, 0xbefc006f,
|
||||
0xbefe007a, 0xbeff007b,
|
||||
0x866f71ff, 0x000003ff,
|
||||
0xb96f4803, 0x866f71ff,
|
||||
0xfffff800, 0x8f6f8b6f,
|
||||
0xb96fa2c3, 0xb973f801,
|
||||
0xb8ee2a05, 0x806e816e,
|
||||
0x8e6e8a6e, 0xb8ef1605,
|
||||
0x806f816f, 0x8e6f866f,
|
||||
0x806e6f6e, 0x806e746e,
|
||||
0x826f8075, 0x866fff6f,
|
||||
0x0000ffff, 0xc0071cb7,
|
||||
0x00000040, 0xc00b1d37,
|
||||
0x00000048, 0xc0031e77,
|
||||
0x00000058, 0xc0071eb7,
|
||||
0x0000005c, 0xbf8cc07f,
|
||||
0x866fff6d, 0xf0000000,
|
||||
0x8f6f9c6f, 0x8e6f906f,
|
||||
0xbeee0080, 0x876e6f6e,
|
||||
0x866fff6d, 0x08000000,
|
||||
0x8f6f9b6f, 0x8e6f8f6f,
|
||||
0x876e6f6e, 0x866fff70,
|
||||
0x00800000, 0x8f6f976f,
|
||||
0xb96ef807, 0x866dff6d,
|
||||
0x0000ffff, 0x86fe7e7e,
|
||||
0x86ea6a6a, 0x8f6e8370,
|
||||
0xb96ee0c2, 0xbf800002,
|
||||
0xb9700002, 0xbf8a0000,
|
||||
0x95806f6c, 0xbf810000,
|
||||
0xc0211cfa, 0x00000078,
|
||||
0x80788478, 0xbf8cc07f,
|
||||
0xbefc006f, 0xbefe0070,
|
||||
0xbeff0071, 0x866f7bff,
|
||||
0x000003ff, 0xb96f4803,
|
||||
0x866f7bff, 0xfffff800,
|
||||
0x8f6f8b6f, 0xb96fa2c3,
|
||||
0xb973f801, 0xb8ee2a05,
|
||||
0x806e816e, 0x8e6e8a6e,
|
||||
0xb8ef1605, 0x806f816f,
|
||||
0x8e6f866f, 0x806e6f6e,
|
||||
0x806e746e, 0x826f8075,
|
||||
0x866fff6f, 0x0000ffff,
|
||||
0xc00b1c37, 0x00000050,
|
||||
0xc00b1d37, 0x00000060,
|
||||
0xc0031e77, 0x00000074,
|
||||
0xbf8cc07f, 0x866fff6d,
|
||||
0xf8000000, 0x8f6f9b6f,
|
||||
0x8e6f906f, 0xbeee0080,
|
||||
0x876e6f6e, 0x866fff6d,
|
||||
0x04000000, 0x8f6f9a6f,
|
||||
0x8e6f8f6f, 0x876e6f6e,
|
||||
0x866fff7a, 0x00800000,
|
||||
0x8f6f976f, 0xb96ef807,
|
||||
0x866dff6d, 0x0000ffff,
|
||||
0x86fe7e7e, 0x86ea6a6a,
|
||||
0x8f6e837a, 0xb96ee0c2,
|
||||
0xbf800002, 0xb97a0002,
|
||||
0xbf8a0000, 0x95806f6c,
|
||||
0xbf810000, 0x00000000,
|
||||
};
|
||||
|
@@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP
|
||||
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
|
||||
end
|
||||
|
||||
//check whether there is mem_viol
|
||||
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
|
||||
s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
|
||||
s_cbranch_scc0 L_NO_PC_REWIND
|
||||
|
||||
//if so, need rewind PC assuming GDS operation gets NACKed
|
||||
s_mov_b32 s_save_tmp, 0 //clear mem_viol bit
|
||||
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
|
||||
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
|
||||
s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
|
||||
s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc
|
||||
|
||||
L_NO_PC_REWIND:
|
||||
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
|
||||
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
|
||||
|
||||
|
@@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
|
||||
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
|
||||
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
|
||||
|
||||
var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
|
||||
var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
|
||||
var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
|
||||
var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
|
||||
|
||||
var s_save_spi_init_lo = exec_lo
|
||||
var s_save_spi_init_hi = exec_hi
|
||||
@@ -162,8 +162,8 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra
|
||||
var s_save_pc_hi = ttmp1
|
||||
var s_save_exec_lo = ttmp2
|
||||
var s_save_exec_hi = ttmp3
|
||||
var s_save_tmp = ttmp4
|
||||
var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
|
||||
var s_save_tmp = ttmp14
|
||||
var s_save_trapsts = ttmp15 //not really used until the end of the SAVE routine
|
||||
var s_save_xnack_mask_lo = ttmp6
|
||||
var s_save_xnack_mask_hi = ttmp7
|
||||
var s_save_buf_rsrc0 = ttmp8
|
||||
@@ -171,9 +171,9 @@ var s_save_buf_rsrc1 = ttmp9
|
||||
var s_save_buf_rsrc2 = ttmp10
|
||||
var s_save_buf_rsrc3 = ttmp11
|
||||
var s_save_status = ttmp12
|
||||
var s_save_mem_offset = ttmp14
|
||||
var s_save_mem_offset = ttmp4
|
||||
var s_save_alloc_size = s_save_trapsts //conflict
|
||||
var s_save_m0 = ttmp15
|
||||
var s_save_m0 = ttmp5
|
||||
var s_save_ttmps_lo = s_save_tmp //no conflict
|
||||
var s_save_ttmps_hi = s_save_trapsts //no conflict
|
||||
|
||||
@@ -207,10 +207,10 @@ var s_restore_mode = ttmp7
|
||||
|
||||
var s_restore_pc_lo = ttmp0
|
||||
var s_restore_pc_hi = ttmp1
|
||||
var s_restore_exec_lo = ttmp14
|
||||
var s_restore_exec_hi = ttmp15
|
||||
var s_restore_status = ttmp4
|
||||
var s_restore_trapsts = ttmp5
|
||||
var s_restore_exec_lo = ttmp4
|
||||
var s_restore_exec_hi = ttmp5
|
||||
var s_restore_status = ttmp14
|
||||
var s_restore_trapsts = ttmp15
|
||||
var s_restore_xnack_mask_lo = xnack_mask_lo
|
||||
var s_restore_xnack_mask_hi = xnack_mask_hi
|
||||
var s_restore_buf_rsrc0 = ttmp8
|
||||
@@ -266,10 +266,16 @@ if (!EMU_RUN_HACK)
|
||||
|
||||
L_HALT_WAVE:
|
||||
// If STATUS.HALT is set then this fault must come from SQC instruction fetch.
|
||||
// We cannot prevent further faults so just terminate the wavefront.
|
||||
// We cannot prevent further faults. Spin wait until context saved.
|
||||
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
s_cbranch_scc0 L_NOT_ALREADY_HALTED
|
||||
s_endpgm
|
||||
|
||||
L_WAIT_CTX_SAVE:
|
||||
s_sleep 0x10
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
|
||||
s_cbranch_scc0 L_WAIT_CTX_SAVE
|
||||
|
||||
L_NOT_ALREADY_HALTED:
|
||||
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
|
||||
@@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP:
|
||||
// Read second-level TBA/TMA from first-level TMA and jump if available.
|
||||
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
|
||||
// ttmp12 holds SQ_WAVE_STATUS
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
|
||||
s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
|
||||
s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
|
||||
s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
|
||||
s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
|
||||
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
|
||||
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
|
||||
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
|
||||
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
|
||||
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
|
||||
@@ -405,7 +411,7 @@ end
|
||||
else
|
||||
end
|
||||
|
||||
// Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
|
||||
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
|
||||
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
||||
get_vgpr_size_bytes(s_save_ttmps_lo)
|
||||
get_sgpr_size_bytes(s_save_ttmps_hi)
|
||||
@@ -413,13 +419,11 @@ end
|
||||
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
|
||||
s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
|
||||
s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
|
||||
s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
|
||||
s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
|
||||
ack_sqc_store_workaround()
|
||||
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
|
||||
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
|
||||
ack_sqc_store_workaround()
|
||||
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
|
||||
ack_sqc_store_workaround()
|
||||
s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
|
||||
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
|
||||
ack_sqc_store_workaround()
|
||||
|
||||
/* setup Resource Contants */
|
||||
@@ -1093,7 +1097,7 @@ end
|
||||
//s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
|
||||
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
|
||||
|
||||
// Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
|
||||
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
|
||||
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
||||
get_vgpr_size_bytes(s_restore_ttmps_lo)
|
||||
get_sgpr_size_bytes(s_restore_ttmps_hi)
|
||||
@@ -1101,10 +1105,9 @@ end
|
||||
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
|
||||
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
|
||||
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
|
||||
s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
|
||||
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
|
||||
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
|
||||
s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
|
||||
s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
|
||||
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
|
||||
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
//reuse s_restore_m0 as a temp register
|
||||
|
@@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
|
||||
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
|
||||
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
|
||||
q_properties->type = KFD_QUEUE_TYPE_SDMA;
|
||||
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
|
||||
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
|
||||
else
|
||||
return -ENOTSUPP;
|
||||
|
||||
@@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
dev = kfd_device_by_id(args->gpu_id);
|
||||
if (dev == NULL)
|
||||
if (!dev)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
@@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
if (args->size != kfd_doorbell_process_slice(dev))
|
||||
return -EINVAL;
|
||||
offset = kfd_get_process_doorbells(dev, p);
|
||||
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
|
||||
if (args->size != PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
|
||||
if (!offset)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
@@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
||||
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
|
||||
args->mmap_offset = offset;
|
||||
|
||||
/* MMIO is mapped through kfd device
|
||||
* Generate a kfd mmap offset
|
||||
*/
|
||||
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
|
||||
args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
|
||||
args->mmap_offset <<= PAGE_SHIFT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
@@ -1551,6 +1567,32 @@ copy_from_user_failed:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_alloc_queue_gws(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
int retval;
|
||||
struct kfd_ioctl_alloc_queue_gws_args *args = data;
|
||||
struct kfd_dev *dev;
|
||||
|
||||
if (!hws_gws_support)
|
||||
return -EINVAL;
|
||||
|
||||
dev = kfd_device_by_id(args->gpu_id);
|
||||
if (!dev) {
|
||||
pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
args->first_gws = 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_get_dmabuf_info(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
@@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
|
||||
kfd_ioctl_import_dmabuf, 0),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
|
||||
kfd_ioctl_alloc_queue_gws, 0),
|
||||
};
|
||||
|
||||
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
||||
@@ -1845,6 +1889,39 @@ err_i1:
|
||||
return retcode;
|
||||
}
|
||||
|
||||
static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
phys_addr_t address;
|
||||
int ret;
|
||||
|
||||
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
|
||||
|
||||
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
|
||||
VM_DONTDUMP | VM_PFNMAP;
|
||||
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
|
||||
pr_debug("Process %d mapping mmio page\n"
|
||||
" target user address == 0x%08llX\n"
|
||||
" physical address == 0x%08llX\n"
|
||||
" vm_flags == 0x%04lX\n"
|
||||
" size == 0x%04lX\n",
|
||||
process->pasid, (unsigned long long) vma->vm_start,
|
||||
address, vma->vm_flags, PAGE_SIZE);
|
||||
|
||||
ret = io_remap_pfn_range(vma,
|
||||
vma->vm_start,
|
||||
address >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
vma->vm_page_prot);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct kfd_process *process;
|
||||
@@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
return kfd_reserved_mem_mmap(dev, process, vma);
|
||||
case KFD_MMAP_TYPE_MMIO:
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
return kfd_mmio_mmap(dev, process, vma);
|
||||
}
|
||||
|
||||
return -EFAULT;
|
||||
|
@@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
|
||||
#define polaris10_cache_info carrizo_cache_info
|
||||
#define polaris11_cache_info carrizo_cache_info
|
||||
#define polaris12_cache_info carrizo_cache_info
|
||||
#define vegam_cache_info carrizo_cache_info
|
||||
/* TODO - check & update Vega10 cache details */
|
||||
#define vega10_cache_info carrizo_cache_info
|
||||
#define raven_cache_info carrizo_cache_info
|
||||
@@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
|
||||
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
|
||||
props->weight = 20;
|
||||
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
|
||||
props->weight = 15;
|
||||
props->weight = 15 * iolink->num_hops_xgmi;
|
||||
else
|
||||
props->weight = node_distance(id_from, id_to);
|
||||
|
||||
@@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
||||
pcache_info = polaris12_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
|
||||
break;
|
||||
case CHIP_VEGAM:
|
||||
pcache_info = vegam_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
@@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
|
||||
|
||||
static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
|
||||
struct kfd_dev *kdev,
|
||||
struct kfd_dev *peer_kdev,
|
||||
struct crat_subtype_iolink *sub_type_hdr,
|
||||
uint32_t proximity_domain_from,
|
||||
uint32_t proximity_domain_to)
|
||||
@@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
|
||||
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
|
||||
sub_type_hdr->proximity_domain_from = proximity_domain_from;
|
||||
sub_type_hdr->proximity_domain_to = proximity_domain_to;
|
||||
sub_type_hdr->num_hops_xgmi =
|
||||
amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
|
||||
(char *)sub_type_hdr +
|
||||
sizeof(struct crat_subtype_iolink));
|
||||
ret = kfd_fill_gpu_xgmi_link_to_gpu(
|
||||
&avail_size, kdev,
|
||||
&avail_size, kdev, peer_dev->gpu,
|
||||
(struct crat_subtype_iolink *)sub_type_hdr,
|
||||
proximity_domain, nid);
|
||||
if (ret < 0)
|
||||
|
@@ -274,7 +274,8 @@ struct crat_subtype_iolink {
|
||||
uint32_t minimum_bandwidth_mbs;
|
||||
uint32_t maximum_bandwidth_mbs;
|
||||
uint32_t recommended_transfer_size;
|
||||
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH];
|
||||
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
|
||||
uint8_t num_hops_xgmi;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
|
||||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
|
||||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
|
||||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 1,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
#endif
|
||||
@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info vegam_device_info = {
|
||||
.asic_family = CHIP_VEGAM,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
@@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = {
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 8,
|
||||
};
|
||||
|
||||
@@ -373,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = {
|
||||
{ 0x6995, &polaris12_device_info }, /* Polaris12 */
|
||||
{ 0x6997, &polaris12_device_info }, /* Polaris12 */
|
||||
{ 0x699F, &polaris12_device_info }, /* Polaris12 */
|
||||
{ 0x694C, &vegam_device_info }, /* VegaM */
|
||||
{ 0x694E, &vegam_device_info }, /* VegaM */
|
||||
{ 0x694F, &vegam_device_info }, /* VegaM */
|
||||
{ 0x6860, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x6861, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x6862, &vega10_device_info }, /* Vega10 */
|
||||
@@ -518,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
} else
|
||||
kfd->max_proc_per_quantum = hws_max_conc_proc;
|
||||
|
||||
/* Allocate global GWS that is shared by all KFD processes */
|
||||
if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
|
||||
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
|
||||
dev_err(kfd_device, "Could not allocate %d gws\n",
|
||||
amdgpu_amdkfd_get_num_gws(kfd->kgd));
|
||||
goto out;
|
||||
}
|
||||
/* calculate max size of mqds needed for queues */
|
||||
size = max_num_of_queues_per_device *
|
||||
kfd->device_info->mqd_size_aligned;
|
||||
@@ -541,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
|
||||
false)) {
|
||||
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
|
||||
goto out;
|
||||
goto alloc_gtt_mem_failure;
|
||||
}
|
||||
|
||||
dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
|
||||
@@ -611,6 +653,9 @@ kfd_doorbell_error:
|
||||
kfd_gtt_sa_fini(kfd);
|
||||
kfd_gtt_sa_init_error:
|
||||
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||
alloc_gtt_mem_failure:
|
||||
if (hws_gws_support)
|
||||
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
|
||||
dev_err(kfd_device,
|
||||
"device %x:%x NOT added due to errors\n",
|
||||
kfd->pdev->vendor, kfd->pdev->device);
|
||||
@@ -628,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
|
||||
kfd_doorbell_fini(kfd);
|
||||
kfd_gtt_sa_fini(kfd);
|
||||
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||
if (hws_gws_support)
|
||||
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
|
||||
}
|
||||
|
||||
kfree(kfd);
|
||||
|
@@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_queue_id);
|
||||
struct queue *q);
|
||||
|
||||
static void kfd_process_hw_exception(struct work_struct *work);
|
||||
|
||||
static inline
|
||||
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
|
||||
{
|
||||
if (type == KFD_QUEUE_TYPE_SDMA)
|
||||
if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
return KFD_MQD_TYPE_SDMA;
|
||||
return KFD_MQD_TYPE_CP;
|
||||
}
|
||||
@@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
|
||||
return dqm->dev->device_info->num_sdma_engines;
|
||||
}
|
||||
|
||||
static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
|
||||
{
|
||||
return dqm->dev->device_info->num_xgmi_sdma_engines;
|
||||
}
|
||||
|
||||
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
|
||||
{
|
||||
return dqm->dev->device_info->num_sdma_engines
|
||||
* dqm->dev->device_info->num_sdma_queues_per_engine;
|
||||
}
|
||||
|
||||
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
|
||||
{
|
||||
return dqm->dev->device_info->num_xgmi_sdma_engines
|
||||
* dqm->dev->device_info->num_sdma_queues_per_engine;
|
||||
}
|
||||
|
||||
void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
@@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
|
||||
* preserve the user mode ABI.
|
||||
*/
|
||||
q->doorbell_id = q->properties.queue_id;
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
/* For SDMA queues on SOC15 with 8-byte doorbell, use static
|
||||
* doorbell assignments based on the engine and queue id.
|
||||
* The doobell index distance between RLC (2*i) and (2*i+1)
|
||||
@@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
|
||||
struct kfd_dev *dev = qpd->dqm->dev;
|
||||
|
||||
if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
return;
|
||||
|
||||
old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
|
||||
@@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
|
||||
retval = create_compute_queue_nocpsch(dqm, q, qpd);
|
||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
retval = create_sdma_queue_nocpsch(dqm, q, qpd);
|
||||
else
|
||||
retval = -EINVAL;
|
||||
@@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
dqm->sdma_queue_count++;
|
||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
dqm->xgmi_sdma_queue_count++;
|
||||
|
||||
/*
|
||||
* Unconditionally increment this counter, regardless of the queue's
|
||||
@@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
struct mqd_manager *mqd_mgr;
|
||||
int retval;
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
||||
if (!mqd_mgr)
|
||||
return -ENOMEM;
|
||||
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
|
||||
|
||||
retval = allocate_hqd(dqm, q);
|
||||
if (retval)
|
||||
@@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
|
||||
int retval;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr)
|
||||
return -ENOMEM;
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
|
||||
deallocate_hqd(dqm, q);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
dqm->sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
dqm->xgmi_sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
} else {
|
||||
pr_debug("q->properties.type %d is invalid\n",
|
||||
q->properties.type);
|
||||
@@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||
retval = -ENODEV;
|
||||
goto out_unlock;
|
||||
}
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
/*
|
||||
* Eviction state logic: we only mark active queues as evicted
|
||||
* to avoid the overhead of restoring inactive queues later
|
||||
@@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||
}
|
||||
} else if (prev_active &&
|
||||
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
|
||||
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
|
||||
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
|
||||
@@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
||||
retval = map_queues_cpsch(dqm);
|
||||
else if (q->properties.is_active &&
|
||||
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
|
||||
if (WARN(q->process->mm != current->mm,
|
||||
"should only run in user thread"))
|
||||
retval = -EFAULT;
|
||||
@@ -571,27 +584,6 @@ out_unlock:
|
||||
return retval;
|
||||
}
|
||||
|
||||
static struct mqd_manager *get_mqd_manager(
|
||||
struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
|
||||
{
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
||||
return NULL;
|
||||
|
||||
pr_debug("mqd type %d\n", type);
|
||||
|
||||
mqd_mgr = dqm->mqd_mgrs[type];
|
||||
if (!mqd_mgr) {
|
||||
mqd_mgr = mqd_manager_init(type, dqm->dev);
|
||||
if (!mqd_mgr)
|
||||
pr_err("mqd manager is NULL");
|
||||
dqm->mqd_mgrs[type] = mqd_mgr;
|
||||
}
|
||||
|
||||
return mqd_mgr;
|
||||
}
|
||||
|
||||
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
@@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (!q->properties.is_active)
|
||||
continue;
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr) { /* should not be here */
|
||||
pr_err("Cannot evict queue, mqd mgr is NULL\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
q->properties.is_evicted = true;
|
||||
q->properties.is_active = false;
|
||||
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
||||
@@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (!q->properties.is_evicted)
|
||||
continue;
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr) { /* should not be here */
|
||||
pr_err("Cannot restore queue, mqd mgr is NULL\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
q->properties.is_evicted = false;
|
||||
q->properties.is_active = true;
|
||||
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
|
||||
@@ -812,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm,
|
||||
retval = dqm->asic_ops.update_qpd(dqm, qpd);
|
||||
|
||||
dqm->processes_count++;
|
||||
kfd_inc_compute_active(dqm->dev);
|
||||
|
||||
dqm_unlock(dqm);
|
||||
|
||||
/* Outside the DQM lock because under the DQM lock we can't do
|
||||
* reclaim or take other locks that others hold while reclaiming.
|
||||
*/
|
||||
kfd_inc_compute_active(dqm->dev);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -836,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm,
|
||||
list_del(&cur->list);
|
||||
kfree(cur);
|
||||
dqm->processes_count--;
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@@ -844,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm,
|
||||
retval = 1;
|
||||
out:
|
||||
dqm_unlock(dqm);
|
||||
|
||||
/* Outside the DQM lock because under the DQM lock we can't do
|
||||
* reclaim or take other locks that others hold while reclaiming.
|
||||
*/
|
||||
if (!retval)
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -879,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
||||
INIT_LIST_HEAD(&dqm->queues);
|
||||
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
|
||||
dqm->sdma_queue_count = 0;
|
||||
dqm->xgmi_sdma_queue_count = 0;
|
||||
|
||||
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
|
||||
int pipe_offset = pipe * get_queues_per_pipe(dqm);
|
||||
@@ -890,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
||||
}
|
||||
|
||||
dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
|
||||
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
|
||||
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
|
||||
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -921,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
|
||||
}
|
||||
|
||||
static int allocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int *sdma_queue_id)
|
||||
struct queue *q)
|
||||
{
|
||||
int bit;
|
||||
|
||||
if (dqm->sdma_bitmap == 0)
|
||||
return -ENOMEM;
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
if (dqm->sdma_bitmap == 0)
|
||||
return -ENOMEM;
|
||||
bit = __ffs64(dqm->sdma_bitmap);
|
||||
dqm->sdma_bitmap &= ~(1ULL << bit);
|
||||
q->sdma_id = bit;
|
||||
q->properties.sdma_engine_id = q->sdma_id %
|
||||
get_num_sdma_engines(dqm);
|
||||
q->properties.sdma_queue_id = q->sdma_id /
|
||||
get_num_sdma_engines(dqm);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
if (dqm->xgmi_sdma_bitmap == 0)
|
||||
return -ENOMEM;
|
||||
bit = __ffs64(dqm->xgmi_sdma_bitmap);
|
||||
dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
|
||||
q->sdma_id = bit;
|
||||
/* sdma_engine_id is sdma id including
|
||||
* both PCIe-optimized SDMAs and XGMI-
|
||||
* optimized SDMAs. The calculation below
|
||||
* assumes the first N engines are always
|
||||
* PCIe-optimized ones
|
||||
*/
|
||||
q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
|
||||
q->sdma_id % get_num_xgmi_sdma_engines(dqm);
|
||||
q->properties.sdma_queue_id = q->sdma_id /
|
||||
get_num_xgmi_sdma_engines(dqm);
|
||||
}
|
||||
|
||||
bit = ffs(dqm->sdma_bitmap) - 1;
|
||||
dqm->sdma_bitmap &= ~(1 << bit);
|
||||
*sdma_queue_id = bit;
|
||||
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
|
||||
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_queue_id)
|
||||
struct queue *q)
|
||||
{
|
||||
if (sdma_queue_id >= get_num_sdma_queues(dqm))
|
||||
return;
|
||||
dqm->sdma_bitmap |= (1 << sdma_queue_id);
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
if (q->sdma_id >= get_num_sdma_queues(dqm))
|
||||
return;
|
||||
dqm->sdma_bitmap |= (1ULL << q->sdma_id);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
|
||||
return;
|
||||
dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
|
||||
}
|
||||
}
|
||||
|
||||
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
@@ -950,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
struct mqd_manager *mqd_mgr;
|
||||
int retval;
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
|
||||
if (!mqd_mgr)
|
||||
return -ENOMEM;
|
||||
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
|
||||
|
||||
retval = allocate_sdma_queue(dqm, &q->sdma_id);
|
||||
retval = allocate_sdma_queue(dqm, q);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
|
||||
q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
|
||||
|
||||
retval = allocate_doorbell(qpd, q);
|
||||
if (retval)
|
||||
goto out_deallocate_sdma_queue;
|
||||
|
||||
pr_debug("SDMA id is: %d\n", q->sdma_id);
|
||||
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
|
||||
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
|
||||
|
||||
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
||||
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
||||
&q->gart_mqd_addr, &q->properties);
|
||||
@@ -987,7 +1002,7 @@ out_uninit_mqd:
|
||||
out_deallocate_doorbell:
|
||||
deallocate_doorbell(qpd, q);
|
||||
out_deallocate_sdma_queue:
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
|
||||
return retval;
|
||||
}
|
||||
@@ -1045,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
|
||||
INIT_LIST_HEAD(&dqm->queues);
|
||||
dqm->queue_count = dqm->processes_count = 0;
|
||||
dqm->sdma_queue_count = 0;
|
||||
dqm->xgmi_sdma_queue_count = 0;
|
||||
dqm->active_runlist = false;
|
||||
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
|
||||
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
|
||||
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
|
||||
|
||||
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
|
||||
|
||||
@@ -1161,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
int retval;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
retval = 0;
|
||||
|
||||
dqm_lock(dqm);
|
||||
|
||||
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
|
||||
pr_warn("Can't create new usermode queue because %d queues were already created\n",
|
||||
dqm->total_queue_count);
|
||||
retval = -EPERM;
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
retval = allocate_sdma_queue(dqm, &q->sdma_id);
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
retval = allocate_sdma_queue(dqm, q);
|
||||
if (retval)
|
||||
goto out_unlock;
|
||||
q->properties.sdma_queue_id =
|
||||
q->sdma_id / get_num_sdma_engines(dqm);
|
||||
q->properties.sdma_engine_id =
|
||||
q->sdma_id % get_num_sdma_engines(dqm);
|
||||
goto out;
|
||||
}
|
||||
|
||||
retval = allocate_doorbell(qpd, q);
|
||||
if (retval)
|
||||
goto out_deallocate_sdma_queue;
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto out_deallocate_doorbell;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
/*
|
||||
* Eviction state logic: we only mark active queues as evicted
|
||||
* to avoid the overhead of restoring inactive queues later
|
||||
@@ -1201,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
q->properties.is_evicted = (q->properties.queue_size > 0 &&
|
||||
q->properties.queue_percent > 0 &&
|
||||
q->properties.queue_address != 0);
|
||||
|
||||
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
||||
|
||||
q->properties.tba_addr = qpd->tba_addr;
|
||||
q->properties.tma_addr = qpd->tma_addr;
|
||||
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
||||
@@ -1211,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
if (retval)
|
||||
goto out_deallocate_doorbell;
|
||||
|
||||
dqm_lock(dqm);
|
||||
|
||||
list_add(&q->list, &qpd->queues_list);
|
||||
qpd->queue_count++;
|
||||
if (q->properties.is_active) {
|
||||
@@ -1221,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
dqm->sdma_queue_count++;
|
||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
dqm->xgmi_sdma_queue_count++;
|
||||
/*
|
||||
* Unconditionally increment this counter, regardless of the queue's
|
||||
* type or whether the queue is active.
|
||||
@@ -1236,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||
out_deallocate_doorbell:
|
||||
deallocate_doorbell(qpd, q);
|
||||
out_deallocate_sdma_queue:
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
out_unlock:
|
||||
dqm_unlock(dqm);
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
out:
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -1268,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int unmap_sdma_queues(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_engine)
|
||||
static int unmap_sdma_queues(struct device_queue_manager *dqm)
|
||||
{
|
||||
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
|
||||
sdma_engine);
|
||||
int i, retval = 0;
|
||||
|
||||
for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
|
||||
dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
|
||||
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
|
||||
if (retval)
|
||||
return retval;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* dqm->lock mutex has to be locked before calling this function */
|
||||
@@ -1309,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
|
||||
if (!dqm->active_runlist)
|
||||
return retval;
|
||||
|
||||
pr_debug("Before destroying queues, sdma queue count is : %u\n",
|
||||
dqm->sdma_queue_count);
|
||||
pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
|
||||
dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
|
||||
|
||||
if (dqm->sdma_queue_count > 0) {
|
||||
unmap_sdma_queues(dqm, 0);
|
||||
unmap_sdma_queues(dqm, 1);
|
||||
}
|
||||
if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
|
||||
unmap_sdma_queues(dqm);
|
||||
|
||||
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
|
||||
filter, filter_param, false, 0);
|
||||
@@ -1379,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
|
||||
}
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto failed;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
|
||||
deallocate_doorbell(qpd, q);
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
dqm->sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
dqm->xgmi_sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
}
|
||||
|
||||
list_del(&q->list);
|
||||
@@ -1403,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
qpd->reset_wavefronts = true;
|
||||
}
|
||||
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
|
||||
/*
|
||||
* Unconditionally decrement this counter, regardless of the queue's
|
||||
* type
|
||||
@@ -1415,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
||||
|
||||
dqm_unlock(dqm);
|
||||
|
||||
/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
|
||||
return retval;
|
||||
|
||||
failed:
|
||||
failed_try_destroy_debugged_queue:
|
||||
|
||||
dqm_unlock(dqm);
|
||||
@@ -1520,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
|
||||
struct queue *q, *next;
|
||||
struct device_process_node *cur, *next_dpn;
|
||||
int retval = 0;
|
||||
bool found = false;
|
||||
|
||||
dqm_lock(dqm);
|
||||
|
||||
@@ -1538,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
|
||||
list_del(&cur->list);
|
||||
kfree(cur);
|
||||
dqm->processes_count--;
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dqm_unlock(dqm);
|
||||
|
||||
/* Outside the DQM lock because under the DQM lock we can't do
|
||||
* reclaim or take other locks that others hold while reclaiming.
|
||||
*/
|
||||
if (found)
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
|
||||
goto dqm_unlock;
|
||||
}
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
||||
if (!mqd_mgr) {
|
||||
r = -ENOMEM;
|
||||
goto dqm_unlock;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
|
||||
|
||||
if (!mqd_mgr->get_wave_state) {
|
||||
r = -EINVAL;
|
||||
@@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||
struct device_process_node *cur, *next_dpn;
|
||||
enum kfd_unmap_queues_filter filter =
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
|
||||
bool found = false;
|
||||
|
||||
retval = 0;
|
||||
|
||||
@@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
dqm->sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
dqm->xgmi_sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
}
|
||||
|
||||
if (q->properties.is_active)
|
||||
@@ -1626,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||
list_del(&cur->list);
|
||||
kfree(cur);
|
||||
dqm->processes_count--;
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1638,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
||||
qpd->reset_wavefronts = false;
|
||||
}
|
||||
|
||||
/* lastly, free mqd resources */
|
||||
dqm_unlock(dqm);
|
||||
|
||||
/* Outside the DQM lock because under the DQM lock we can't do
|
||||
* reclaim or take other locks that others hold while reclaiming.
|
||||
*/
|
||||
if (found)
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
|
||||
/* Lastly, free mqd resources.
|
||||
* Do uninit_mqd() after dqm_unlock to avoid circular locking.
|
||||
*/
|
||||
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
list_del(&q->list);
|
||||
qpd->queue_count--;
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
}
|
||||
|
||||
out:
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int init_mqd_managers(struct device_queue_manager *dqm)
|
||||
{
|
||||
int i, j;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
|
||||
mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
|
||||
if (!mqd_mgr) {
|
||||
pr_err("mqd manager [%d] initialization failed\n", i);
|
||||
goto out_free;
|
||||
}
|
||||
dqm->mqd_mgrs[i] = mqd_mgr;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_free:
|
||||
for (j = 0; j < i; j++) {
|
||||
kfree(dqm->mqd_mgrs[j]);
|
||||
dqm->mqd_mgrs[j] = NULL;
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
|
||||
static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
|
||||
{
|
||||
int retval;
|
||||
struct kfd_dev *dev = dqm->dev;
|
||||
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
|
||||
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
|
||||
dev->device_info->num_sdma_engines *
|
||||
dev->device_info->num_sdma_queues_per_engine +
|
||||
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
|
||||
|
||||
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
|
||||
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
|
||||
(void *)&(mem_obj->cpu_ptr), true);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -1693,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||
dqm->ops.stop = stop_cpsch;
|
||||
dqm->ops.destroy_queue = destroy_queue_cpsch;
|
||||
dqm->ops.update_queue = update_queue;
|
||||
dqm->ops.get_mqd_manager = get_mqd_manager;
|
||||
dqm->ops.register_process = register_process;
|
||||
dqm->ops.unregister_process = unregister_process;
|
||||
dqm->ops.uninitialize = uninitialize;
|
||||
@@ -1713,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||
dqm->ops.create_queue = create_queue_nocpsch;
|
||||
dqm->ops.destroy_queue = destroy_queue_nocpsch;
|
||||
dqm->ops.update_queue = update_queue;
|
||||
dqm->ops.get_mqd_manager = get_mqd_manager;
|
||||
dqm->ops.register_process = register_process;
|
||||
dqm->ops.unregister_process = unregister_process;
|
||||
dqm->ops.initialize = initialize_nocpsch;
|
||||
@@ -1749,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
|
||||
break;
|
||||
|
||||
@@ -1764,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (init_mqd_managers(dqm))
|
||||
goto out_free;
|
||||
|
||||
if (allocate_hiq_sdma_mqd(dqm)) {
|
||||
pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (!dqm->ops.initialize(dqm))
|
||||
return dqm;
|
||||
|
||||
@@ -1772,9 +1843,17 @@ out_free:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd)
|
||||
{
|
||||
WARN(!mqd, "No hiq sdma mqd trunk to free");
|
||||
|
||||
amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
|
||||
}
|
||||
|
||||
void device_queue_manager_uninit(struct device_queue_manager *dqm)
|
||||
{
|
||||
dqm->ops.uninitialize(dqm);
|
||||
deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
|
||||
kfree(dqm);
|
||||
}
|
||||
|
||||
|
@@ -48,8 +48,6 @@ struct device_process_node {
|
||||
*
|
||||
* @update_queue: Queue update routine.
|
||||
*
|
||||
* @get_mqd_manager: Returns the mqd manager according to the mqd type.
|
||||
*
|
||||
* @exeute_queues: Dispatches the queues list to the H/W.
|
||||
*
|
||||
* @register_process: This routine associates a specific process with device.
|
||||
@@ -97,10 +95,6 @@ struct device_queue_manager_ops {
|
||||
int (*update_queue)(struct device_queue_manager *dqm,
|
||||
struct queue *q);
|
||||
|
||||
struct mqd_manager * (*get_mqd_manager)
|
||||
(struct device_queue_manager *dqm,
|
||||
enum KFD_MQD_TYPE type);
|
||||
|
||||
int (*register_process)(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
@@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops {
|
||||
void (*init_sdma_vm)(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -185,10 +181,12 @@ struct device_queue_manager {
|
||||
unsigned int processes_count;
|
||||
unsigned int queue_count;
|
||||
unsigned int sdma_queue_count;
|
||||
unsigned int xgmi_sdma_queue_count;
|
||||
unsigned int total_queue_count;
|
||||
unsigned int next_pipe_to_allocate;
|
||||
unsigned int *allocated_queues;
|
||||
unsigned int sdma_bitmap;
|
||||
uint64_t sdma_bitmap;
|
||||
uint64_t xgmi_sdma_bitmap;
|
||||
unsigned int vmid_bitmap;
|
||||
uint64_t pipelines_addr;
|
||||
struct kfd_mem_obj *pipeline_mem;
|
||||
@@ -201,6 +199,7 @@ struct device_queue_manager {
|
||||
/* hw exception */
|
||||
bool is_hws_hang;
|
||||
struct work_struct hw_exception_work;
|
||||
struct kfd_mem_obj hiq_sdma_mqd;
|
||||
};
|
||||
|
||||
void device_queue_manager_init_cik(
|
||||
@@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);
|
||||
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
|
||||
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
|
||||
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
|
||||
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
|
||||
|
||||
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
|
||||
{
|
||||
|
@@ -48,6 +48,7 @@ void device_queue_manager_init_cik(
|
||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
||||
asic_ops->update_qpd = update_qpd_cik;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_cik;
|
||||
}
|
||||
|
||||
void device_queue_manager_init_cik_hawaii(
|
||||
@@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii(
|
||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
||||
asic_ops->update_qpd = update_qpd_cik_hawaii;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||
|
@@ -37,6 +37,7 @@ void device_queue_manager_init_v9(
|
||||
{
|
||||
asic_ops->update_qpd = update_qpd_v9;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_v9;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_v9;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
|
||||
|
@@ -54,6 +54,7 @@ void device_queue_manager_init_vi(
|
||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
|
||||
asic_ops->update_qpd = update_qpd_vi;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_vi;
|
||||
}
|
||||
|
||||
void device_queue_manager_init_vi_tonga(
|
||||
@@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga(
|
||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
|
||||
asic_ops->update_qpd = update_qpd_vi_tonga;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||
|
@@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
|
||||
return; /* Presumably process exited. */
|
||||
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
|
||||
memory_exception_data.gpu_id = dev->id;
|
||||
memory_exception_data.failure.imprecise = 1;
|
||||
memory_exception_data.failure.imprecise = true;
|
||||
/* Set failure reason */
|
||||
if (info) {
|
||||
memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
|
||||
|
@@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
kfd_init_apertures_vi(pdd, id);
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
@@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
||||
kq->nop_packet = nop.u32all;
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_HIQ:
|
||||
kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm,
|
||||
KFD_MQD_TYPE_HIQ);
|
||||
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
|
||||
break;
|
||||
default:
|
||||
pr_err("Invalid queue type %d\n", type);
|
||||
@@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
kernel_queue_init_vi(&kq->ops_asic_specific);
|
||||
break;
|
||||
|
||||
|
@@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
||||
sizeof(struct pm4_mes_map_queues));
|
||||
packet->bitfields2.alloc_format =
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
||||
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_map_queues__compute_vi;
|
||||
packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__normal_compute_vi;
|
||||
|
||||
@@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||
queue_type__mes_map_queues__debug_interface_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
||||
engine_sel__mes_map_queues__sdma0_vi;
|
||||
use_static = false; /* no static queues under SDMA */
|
||||
@@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
||||
engine_sel__mes_unmap_queues__compute;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
||||
break;
|
||||
|
@@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
||||
sizeof(struct pm4_mes_map_queues));
|
||||
packet->bitfields2.alloc_format =
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
||||
@@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
queue_type__mes_map_queues__debug_interface_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
||||
engine_sel__mes_map_queues__sdma0_vi;
|
||||
use_static = false; /* no static queues under SDMA */
|
||||
@@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
||||
engine_sel__mes_unmap_queues__compute;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
||||
break;
|
||||
|
@@ -23,34 +23,54 @@
|
||||
|
||||
#include "kfd_mqd_manager.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
|
||||
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev)
|
||||
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev)
|
||||
{
|
||||
switch (dev->device_info->asic_family) {
|
||||
case CHIP_KAVERI:
|
||||
return mqd_manager_init_cik(type, dev);
|
||||
case CHIP_HAWAII:
|
||||
return mqd_manager_init_cik_hawaii(type, dev);
|
||||
case CHIP_CARRIZO:
|
||||
return mqd_manager_init_vi(type, dev);
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
return mqd_manager_init_vi_tonga(type, dev);
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_RAVEN:
|
||||
return mqd_manager_init_v9(type, dev);
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dev->device_info->asic_family);
|
||||
}
|
||||
struct kfd_mem_obj *mqd_mem_obj = NULL;
|
||||
|
||||
return NULL;
|
||||
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
if (!mqd_mem_obj)
|
||||
return NULL;
|
||||
|
||||
mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
|
||||
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
|
||||
mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
|
||||
|
||||
return mqd_mem_obj;
|
||||
}
|
||||
|
||||
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct kfd_mem_obj *mqd_mem_obj = NULL;
|
||||
uint64_t offset;
|
||||
|
||||
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
if (!mqd_mem_obj)
|
||||
return NULL;
|
||||
|
||||
offset = (q->sdma_engine_id *
|
||||
dev->device_info->num_sdma_queues_per_engine +
|
||||
q->sdma_queue_id) *
|
||||
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
|
||||
|
||||
offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
|
||||
|
||||
mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
|
||||
+ offset);
|
||||
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
|
||||
mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
|
||||
dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
|
||||
|
||||
return mqd_mem_obj;
|
||||
}
|
||||
|
||||
void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
WARN_ON(!mqd_mem_obj->gtt_mem);
|
||||
kfree(mqd_mem_obj);
|
||||
}
|
||||
|
||||
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||
|
@@ -99,8 +99,16 @@ struct mqd_manager {
|
||||
|
||||
struct mutex mqd_mutex;
|
||||
struct kfd_dev *dev;
|
||||
uint32_t mqd_size;
|
||||
};
|
||||
|
||||
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev);
|
||||
|
||||
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
|
||||
struct queue_properties *q);
|
||||
void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj);
|
||||
|
||||
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||
const uint32_t *cu_mask, uint32_t cu_mask_count,
|
||||
uint32_t *se_mask);
|
||||
|
@@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
|
||||
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct kfd_mem_obj *mqd_mem_obj;
|
||||
|
||||
if (q->type == KFD_QUEUE_TYPE_HIQ)
|
||||
return allocate_hiq_mqd(kfd);
|
||||
|
||||
if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd),
|
||||
&mqd_mem_obj))
|
||||
return NULL;
|
||||
|
||||
return mqd_mem_obj;
|
||||
}
|
||||
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
@@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
uint64_t addr;
|
||||
struct cik_mqd *m;
|
||||
int retval;
|
||||
struct kfd_dev *kfd = mm->dev;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_mqd(kfd, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
@@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
{
|
||||
int retval;
|
||||
struct cik_sdma_rlc_registers *m;
|
||||
struct kfd_dev *dev = mm->dev;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
||||
sizeof(struct cik_sdma_rlc_registers),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
|
||||
@@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, struct queue_properties *p,
|
||||
@@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
mqd->update_mqd = update_mqd;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct cik_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_HIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct cik_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_DIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct cik_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_SDMA:
|
||||
mqd->init_mqd = init_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd_sdma;
|
||||
mqd->update_mqd = update_mqd_sdma;
|
||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||
mqd->is_occupied = is_occupied_sdma;
|
||||
mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||
#endif
|
||||
|
@@ -67,6 +67,43 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
|
||||
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
int retval;
|
||||
struct kfd_mem_obj *mqd_mem_obj = NULL;
|
||||
|
||||
if (q->type == KFD_QUEUE_TYPE_HIQ)
|
||||
return allocate_hiq_mqd(kfd);
|
||||
|
||||
/* From V9, for CWSR, the control stack is located on the next page
|
||||
* boundary after the mqd, we will use the gtt allocation function
|
||||
* instead of sub-allocation function.
|
||||
*/
|
||||
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
|
||||
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
|
||||
if (!mqd_mem_obj)
|
||||
return NULL;
|
||||
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
|
||||
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
|
||||
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
|
||||
&(mqd_mem_obj->gtt_mem),
|
||||
&(mqd_mem_obj->gpu_addr),
|
||||
(void *)&(mqd_mem_obj->cpu_ptr), true);
|
||||
} else {
|
||||
retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
|
||||
&mqd_mem_obj);
|
||||
}
|
||||
|
||||
if (retval) {
|
||||
kfree(mqd_mem_obj);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mqd_mem_obj;
|
||||
|
||||
}
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
@@ -76,24 +113,8 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct v9_mqd *m;
|
||||
struct kfd_dev *kfd = mm->dev;
|
||||
|
||||
/* From V9, for CWSR, the control stack is located on the next page
|
||||
* boundary after the mqd, we will use the gtt allocation function
|
||||
* instead of sub-allocation function.
|
||||
*/
|
||||
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
|
||||
*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
|
||||
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
|
||||
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
|
||||
&((*mqd_mem_obj)->gtt_mem),
|
||||
&((*mqd_mem_obj)->gpu_addr),
|
||||
(void *)&((*mqd_mem_obj)->cpu_ptr), true);
|
||||
} else
|
||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
|
||||
mqd_mem_obj);
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_mqd(kfd, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
@@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
{
|
||||
int retval;
|
||||
struct v9_sdma_mqd *m;
|
||||
struct kfd_dev *dev = mm->dev;
|
||||
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
||||
sizeof(struct v9_sdma_mqd),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
@@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
struct queue_properties *p, struct mm_struct *mms)
|
||||
@@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->get_wave_state = get_wave_state;
|
||||
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_HIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_DIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_SDMA:
|
||||
mqd->init_mqd = init_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd_sdma;
|
||||
mqd->update_mqd = update_mqd_sdma;
|
||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||
mqd->is_occupied = is_occupied_sdma;
|
||||
mqd->mqd_size = sizeof(struct v9_sdma_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||
#endif
|
||||
|
@@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
|
||||
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct kfd_mem_obj *mqd_mem_obj;
|
||||
|
||||
if (q->type == KFD_QUEUE_TYPE_HIQ)
|
||||
return allocate_hiq_mqd(kfd);
|
||||
|
||||
if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd),
|
||||
&mqd_mem_obj))
|
||||
return NULL;
|
||||
|
||||
return mqd_mem_obj;
|
||||
}
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
@@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
int retval;
|
||||
uint64_t addr;
|
||||
struct vi_mqd *m;
|
||||
struct kfd_dev *kfd = mm->dev;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
|
||||
mqd_mem_obj);
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_mqd(kfd, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
@@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
{
|
||||
int retval;
|
||||
struct vi_sdma_mqd *m;
|
||||
struct kfd_dev *dev = mm->dev;
|
||||
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
||||
sizeof(struct vi_sdma_mqd),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
@@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
memset(m, 0, sizeof(struct vi_sdma_mqd));
|
||||
|
||||
*mqd = m;
|
||||
if (gart_addr != NULL)
|
||||
if (gart_addr)
|
||||
*gart_addr = (*mqd_mem_obj)->gpu_addr;
|
||||
|
||||
retval = mm->update_mqd(mm, m, q);
|
||||
@@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
struct queue_properties *p, struct mm_struct *mms)
|
||||
@@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->get_wave_state = get_wave_state;
|
||||
mqd->mqd_size = sizeof(struct vi_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_HIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct vi_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_DIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct vi_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_SDMA:
|
||||
mqd->init_mqd = init_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd_sdma;
|
||||
mqd->update_mqd = update_mqd_sdma;
|
||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||
mqd->is_occupied = is_occupied_sdma;
|
||||
mqd->mqd_size = sizeof(struct vi_sdma_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||
#endif
|
||||
|
@@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
|
||||
|
||||
process_count = pm->dqm->processes_count;
|
||||
queue_count = pm->dqm->queue_count;
|
||||
compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
|
||||
compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
|
||||
pm->dqm->xgmi_sdma_queue_count;
|
||||
|
||||
/* check if there is over subscription
|
||||
* Note: the arbitration between the number of VMIDs and
|
||||
@@ -227,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
pm->pmf = &kfd_vi_pm_funcs;
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
|
@@ -176,8 +176,7 @@ struct pm4_mes_map_process {
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t num_gws:6;
|
||||
uint32_t reserved7:1;
|
||||
uint32_t num_gws:7;
|
||||
uint32_t sdma_enable:1;
|
||||
uint32_t num_oac:4;
|
||||
uint32_t reserved8:4;
|
||||
@@ -255,11 +254,6 @@ enum mes_map_queues_queue_type_enum {
|
||||
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
|
||||
};
|
||||
|
||||
enum mes_map_queues_alloc_format_enum {
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi = 0,
|
||||
alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
|
||||
};
|
||||
|
||||
enum mes_map_queues_engine_sel_enum {
|
||||
engine_sel__mes_map_queues__compute_vi = 0,
|
||||
engine_sel__mes_map_queues__sdma0_vi = 2,
|
||||
@@ -277,9 +271,11 @@ struct pm4_mes_map_queues {
|
||||
struct {
|
||||
uint32_t reserved1:4;
|
||||
enum mes_map_queues_queue_sel_enum queue_sel:2;
|
||||
uint32_t reserved2:15;
|
||||
uint32_t reserved5:6;
|
||||
uint32_t gws_control_queue:1;
|
||||
uint32_t reserved2:8;
|
||||
enum mes_map_queues_queue_type_enum queue_type:3;
|
||||
enum mes_map_queues_alloc_format_enum alloc_format:2;
|
||||
uint32_t reserved3:2;
|
||||
enum mes_map_queues_engine_sel_enum engine_sel:3;
|
||||
uint32_t num_queues:3;
|
||||
} bitfields2;
|
||||
|
@@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum {
|
||||
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
|
||||
};
|
||||
|
||||
enum mes_map_queues_alloc_format_vi_enum {
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi = 0,
|
||||
alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
|
||||
};
|
||||
|
||||
enum mes_map_queues_engine_sel_vi_enum {
|
||||
engine_sel__mes_map_queues__compute_vi = 0,
|
||||
engine_sel__mes_map_queues__sdma0_vi = 2,
|
||||
@@ -240,7 +235,7 @@ struct pm4_mes_map_queues {
|
||||
enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
|
||||
uint32_t reserved2:15;
|
||||
enum mes_map_queues_queue_type_vi_enum queue_type:3;
|
||||
enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
|
||||
uint32_t reserved3:2;
|
||||
enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
|
||||
uint32_t num_queues:3;
|
||||
} bitfields2;
|
||||
|
@@ -59,6 +59,7 @@
|
||||
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
|
||||
#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
|
||||
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
|
||||
@@ -160,6 +161,11 @@ extern int noretry;
|
||||
*/
|
||||
extern int halt_if_hws_hang;
|
||||
|
||||
/*
|
||||
* Whether MEC FW support GWS barriers
|
||||
*/
|
||||
extern bool hws_gws_support;
|
||||
|
||||
enum cache_policy {
|
||||
cache_policy_coherent,
|
||||
cache_policy_noncoherent
|
||||
@@ -188,6 +194,7 @@ struct kfd_device_info {
|
||||
bool needs_iommu_device;
|
||||
bool needs_pci_atomics;
|
||||
unsigned int num_sdma_engines;
|
||||
unsigned int num_xgmi_sdma_engines;
|
||||
unsigned int num_sdma_queues_per_engine;
|
||||
};
|
||||
|
||||
@@ -258,7 +265,7 @@ struct kfd_dev {
|
||||
bool interrupts_active;
|
||||
|
||||
/* Debug manager */
|
||||
struct kfd_dbgmgr *dbgmgr;
|
||||
struct kfd_dbgmgr *dbgmgr;
|
||||
|
||||
/* Firmware versions */
|
||||
uint16_t mec_fw_version;
|
||||
@@ -282,6 +289,9 @@ struct kfd_dev {
|
||||
|
||||
/* Compute Profile ref. count */
|
||||
atomic_t compute_profile;
|
||||
|
||||
/* Global GWS resource shared b/t processes*/
|
||||
void *gws;
|
||||
};
|
||||
|
||||
enum kfd_mempool {
|
||||
@@ -329,7 +339,8 @@ enum kfd_queue_type {
|
||||
KFD_QUEUE_TYPE_COMPUTE,
|
||||
KFD_QUEUE_TYPE_SDMA,
|
||||
KFD_QUEUE_TYPE_HIQ,
|
||||
KFD_QUEUE_TYPE_DIQ
|
||||
KFD_QUEUE_TYPE_DIQ,
|
||||
KFD_QUEUE_TYPE_SDMA_XGMI
|
||||
};
|
||||
|
||||
enum kfd_queue_format {
|
||||
@@ -444,6 +455,9 @@ struct queue_properties {
|
||||
*
|
||||
* @device: The kfd device that created this queue.
|
||||
*
|
||||
* @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
|
||||
* otherwise.
|
||||
*
|
||||
* This structure represents user mode compute queues.
|
||||
* It contains all the necessary data to handle such queues.
|
||||
*
|
||||
@@ -465,6 +479,7 @@ struct queue {
|
||||
|
||||
struct kfd_process *process;
|
||||
struct kfd_dev *device;
|
||||
void *gws;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -475,6 +490,7 @@ enum KFD_MQD_TYPE {
|
||||
KFD_MQD_TYPE_HIQ, /* for hiq */
|
||||
KFD_MQD_TYPE_CP, /* for cp queues and diq */
|
||||
KFD_MQD_TYPE_SDMA, /* for sdma queues */
|
||||
KFD_MQD_TYPE_DIQ, /* for diq */
|
||||
KFD_MQD_TYPE_MAX
|
||||
};
|
||||
|
||||
@@ -819,8 +835,6 @@ void uninit_queue(struct queue *q);
|
||||
void print_queue_properties(struct queue_properties *q);
|
||||
void print_queue(struct queue *q);
|
||||
|
||||
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
|
||||
@@ -859,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
|
||||
struct queue_properties *p);
|
||||
int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
|
||||
struct queue_properties *p);
|
||||
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
|
||||
void *gws);
|
||||
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
|
||||
unsigned int qid);
|
||||
int pqm_get_wave_state(struct process_queue_manager *pqm,
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_kernel_queue.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
static inline struct process_queue_node *get_queue_by_qid(
|
||||
struct process_queue_manager *pqm, unsigned int qid)
|
||||
@@ -74,6 +75,55 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
|
||||
pdd->already_dequeued = true;
|
||||
}
|
||||
|
||||
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
|
||||
void *gws)
|
||||
{
|
||||
struct kfd_dev *dev = NULL;
|
||||
struct process_queue_node *pqn;
|
||||
struct kfd_process_device *pdd;
|
||||
struct kgd_mem *mem = NULL;
|
||||
int ret;
|
||||
|
||||
pqn = get_queue_by_qid(pqm, qid);
|
||||
if (!pqn) {
|
||||
pr_err("Queue id does not match any known queue\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (pqn->q)
|
||||
dev = pqn->q->device;
|
||||
if (WARN_ON(!dev))
|
||||
return -ENODEV;
|
||||
|
||||
pdd = kfd_get_process_device_data(dev, pqm->process);
|
||||
if (!pdd) {
|
||||
pr_err("Process device data doesn't exist\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Only allow one queue per process can have GWS assigned */
|
||||
if (gws && pdd->qpd.num_gws)
|
||||
return -EINVAL;
|
||||
|
||||
if (!gws && pdd->qpd.num_gws == 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (gws)
|
||||
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
|
||||
gws, &mem);
|
||||
else
|
||||
ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
|
||||
pqn->q->gws);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
pqn->q->gws = mem;
|
||||
pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0;
|
||||
|
||||
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
|
||||
pqn->q);
|
||||
}
|
||||
|
||||
void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
@@ -186,8 +236,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) {
|
||||
pr_err("Over-subscription is not allowed for SDMA.\n");
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count
|
||||
>= get_num_sdma_queues(dev->dqm)) ||
|
||||
(type == KFD_QUEUE_TYPE_SDMA_XGMI &&
|
||||
dev->dqm->xgmi_sdma_queue_count
|
||||
>= get_num_xgmi_sdma_queues(dev->dqm))) {
|
||||
pr_debug("Over-subscription is not allowed for SDMA.\n");
|
||||
retval = -EPERM;
|
||||
goto err_create_queue;
|
||||
}
|
||||
@@ -325,6 +380,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
|
||||
if (retval != -ETIME)
|
||||
goto err_destroy_queue;
|
||||
}
|
||||
|
||||
if (pqn->q->gws) {
|
||||
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
|
||||
pqn->q->gws);
|
||||
pdd->qpd.num_gws = 0;
|
||||
}
|
||||
|
||||
kfree(pqn->q->properties.cu_mask);
|
||||
pqn->q->properties.cu_mask = NULL;
|
||||
uninit_queue(pqn->q);
|
||||
@@ -446,6 +508,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
|
||||
q = pqn->q;
|
||||
switch (q->properties.type) {
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
seq_printf(m, " SDMA queue on device %x\n",
|
||||
q->device->id);
|
||||
mqd_type = KFD_MQD_TYPE_SDMA;
|
||||
@@ -461,8 +524,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
|
||||
q->properties.type, q->device->id);
|
||||
continue;
|
||||
}
|
||||
mqd_mgr = q->device->dqm->ops.get_mqd_manager(
|
||||
q->device->dqm, mqd_type);
|
||||
mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
|
||||
} else if (pqn->kq) {
|
||||
q = pqn->kq->queue;
|
||||
mqd_mgr = pqn->kq->mqd_mgr;
|
||||
@@ -470,7 +532,6 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
seq_printf(m, " DIQ on device %x\n",
|
||||
pqn->kq->dev->id);
|
||||
mqd_type = KFD_MQD_TYPE_HIQ;
|
||||
break;
|
||||
default:
|
||||
seq_printf(m,
|
||||
|
@@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
|
||||
dev->node_props.lds_size_in_kb);
|
||||
sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
|
||||
dev->node_props.gds_size_in_kb);
|
||||
sysfs_show_32bit_prop(buffer, "num_gws",
|
||||
dev->node_props.num_gws);
|
||||
sysfs_show_32bit_prop(buffer, "wave_front_size",
|
||||
dev->node_props.wave_front_size);
|
||||
sysfs_show_32bit_prop(buffer, "array_count",
|
||||
@@ -476,6 +478,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
|
||||
dev->node_props.drm_render_minor);
|
||||
sysfs_show_64bit_prop(buffer, "hive_id",
|
||||
dev->node_props.hive_id);
|
||||
sysfs_show_32bit_prop(buffer, "num_sdma_engines",
|
||||
dev->node_props.num_sdma_engines);
|
||||
sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines",
|
||||
dev->node_props.num_sdma_xgmi_engines);
|
||||
|
||||
if (dev->gpu) {
|
||||
log_max_watch_addr =
|
||||
@@ -1078,8 +1084,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
|
||||
local_mem_info.local_mem_size_public;
|
||||
|
||||
buf[0] = gpu->pdev->devfn;
|
||||
buf[1] = gpu->pdev->subsystem_vendor;
|
||||
buf[2] = gpu->pdev->subsystem_device;
|
||||
buf[1] = gpu->pdev->subsystem_vendor |
|
||||
(gpu->pdev->subsystem_device << 16);
|
||||
buf[2] = pci_domain_nr(gpu->pdev->bus);
|
||||
buf[3] = gpu->pdev->device;
|
||||
buf[4] = gpu->pdev->bus->number;
|
||||
buf[5] = lower_32_bits(local_mem_size);
|
||||
@@ -1281,6 +1288,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
gpu->shared_resources.drm_render_minor;
|
||||
|
||||
dev->node_props.hive_id = gpu->hive_id;
|
||||
dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
|
||||
dev->node_props.num_sdma_xgmi_engines =
|
||||
gpu->device_info->num_xgmi_sdma_engines;
|
||||
dev->node_props.num_gws = (hws_gws_support &&
|
||||
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
|
||||
amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
|
||||
|
||||
kfd_fill_mem_clk_max_info(dev);
|
||||
kfd_fill_iolink_non_crat_info(dev);
|
||||
@@ -1298,6 +1311,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
pr_debug("Adding doorbell packet type capability\n");
|
||||
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
|
||||
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
|
||||
|
@@ -65,6 +65,7 @@ struct kfd_node_properties {
|
||||
uint32_t max_waves_per_simd;
|
||||
uint32_t lds_size_in_kb;
|
||||
uint32_t gds_size_in_kb;
|
||||
uint32_t num_gws;
|
||||
uint32_t wave_front_size;
|
||||
uint32_t array_count;
|
||||
uint32_t simd_arrays_per_engine;
|
||||
@@ -78,6 +79,8 @@ struct kfd_node_properties {
|
||||
uint32_t max_engine_clk_fcompute;
|
||||
uint32_t max_engine_clk_ccompute;
|
||||
int32_t drm_render_minor;
|
||||
uint32_t num_sdma_engines;
|
||||
uint32_t num_sdma_xgmi_engines;
|
||||
uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user