Merge branch 'drm-next-5.3' of git://people.freedesktop.org/~agd5f/linux into drm-next

New stuff for 5.3:
- Add new thermal sensors for vega asics
- Various RAS fixes
- Add sysfs interface for memory interface utilization
- Use HMM rather than mmu notifier for user pages
- Expose xgmi topology via kfd
- SR-IOV fixes
- Fixes for manual driver reload
- Add unique identifier for vega asics
- Clean up user fence handling with UVD/VCE/VCN blocks
- Convert DC to use core bpc attribute rather than a custom one
- Add GWS support for KFD
- Vega powerplay improvements
- Add CRC support for DCE 12
- SR-IOV support for new security policy
- Various cleanups

From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190529220944.14464-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie
2019-05-31 09:33:29 +10:00
202 changed files with 6597 additions and 2659 deletions

View File

@@ -21,7 +21,7 @@
*/
static const uint32_t cwsr_trap_gfx8_hex[] = {
0xbf820001, 0xbf82012b,
0xbf820001, 0xbf820121,
0xb8f4f802, 0x89748674,
0xb8f5f803, 0x8675ff75,
0x00000400, 0xbf850017,
@@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
0x8671ff71, 0x0000ffff,
0x8f728374, 0xb972e0c2,
0xbf800002, 0xb9740002,
0xbe801f70, 0xb8f5f803,
0x8675ff75, 0x00000100,
0xbf840006, 0xbefa0080,
0xb97a0203, 0x8671ff71,
0x0000ffff, 0x80f08870,
0x82f18071, 0xbefa0080,
0xbe801f70, 0xbefa0080,
0xb97a0283, 0xbef60068,
0xbef70069, 0xb8fa1c07,
0x8e7a9c7a, 0x87717a71,
@@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf820001, 0xbf82015d,
0xbf820001, 0xbf82015e,
0xb8f8f802, 0x89788678,
0xb8f1f803, 0x866eff71,
0x00000400, 0xbf850037,
0x866eff71, 0x00000800,
0xbf850003, 0x866eff71,
0x00000100, 0xbf840008,
0xb8fbf803, 0x866eff7b,
0x00000400, 0xbf85003b,
0x866eff7b, 0x00000800,
0xbf850003, 0x866eff7b,
0x00000100, 0xbf84000c,
0x866eff78, 0x00002000,
0xbf840001, 0xbf810000,
0xbf840005, 0xbf8e0010,
0xb8eef803, 0x866eff6e,
0x00000400, 0xbf84fffb,
0x8778ff78, 0x00002000,
0x80ec886c, 0x82ed806d,
0xb8eef807, 0x866fff6e,
@@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0x8977ff77, 0xfc000000,
0x87776f77, 0x896eff6e,
0x001f8000, 0xb96ef807,
0xb8f0f812, 0xb8f1f813,
0x8ef08870, 0xc0071bb8,
0xb8faf812, 0xb8fbf813,
0x8efa887a, 0xc0071bbd,
0x00000000, 0xbf8cc07f,
0xc0071c38, 0x00000008,
0xc0071ebd, 0x00000008,
0xbf8cc07f, 0x86ee6e6e,
0xbf840001, 0xbe801d6e,
0xb8f1f803, 0x8671ff71,
0xb8fbf803, 0x867bff7b,
0x000001ff, 0xbf850002,
0x806c846c, 0x826d806d,
0x866dff6d, 0x0000ffff,
@@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0x8f6e8378, 0xb96ee0c2,
0xbf800002, 0xb9780002,
0xbe801f6c, 0x866dff6d,
0x0000ffff, 0xbef00080,
0xb9700283, 0xb8f02407,
0x8e709c70, 0x876d706d,
0xb8f003c7, 0x8e709b70,
0x876d706d, 0xb8f0f807,
0x8670ff70, 0x00007fff,
0xb970f807, 0xbeee007e,
0x0000ffff, 0xbefa0080,
0xb97a0283, 0xb8fa2407,
0x8e7a9b7a, 0x876d7a6d,
0xb8fa03c7, 0x8e7a9a7a,
0x876d7a6d, 0xb8faf807,
0x867aff7a, 0x00007fff,
0xb97af807, 0xbeee007e,
0xbeef007f, 0xbefe0180,
0xbf900004, 0x87708478,
0xb970f802, 0xbf8e0002,
0xbf88fffe, 0xb8f02a05,
0xbf900004, 0x877a8478,
0xb97af802, 0xbf8e0002,
0xbf88fffe, 0xb8fa2a05,
0x807a817a, 0x8e7a8a7a,
0xb8fb1605, 0x807b817b,
0x8e7b867b, 0x807a7b7a,
0x807a7e7a, 0x827b807f,
0x867bff7b, 0x0000ffff,
0xc04b1c3d, 0x00000050,
0xbf8cc07f, 0xc04b1d3d,
0x00000060, 0xbf8cc07f,
0xc0431e7d, 0x00000074,
0xbf8cc07f, 0xbef4007e,
0x8675ff7f, 0x0000ffff,
0x8775ff75, 0x00040000,
0xbef60080, 0xbef700ff,
0x00807fac, 0x867aff7f,
0x08000000, 0x8f7a837a,
0x87777a77, 0x867aff7f,
0x70000000, 0x8f7a817a,
0x87777a77, 0xbef1007c,
0xbef00080, 0xb8f02a05,
0x80708170, 0x8e708a70,
0xb8f11605, 0x80718171,
0x8e718671, 0x80707170,
0x80707e70, 0x8271807f,
0x8671ff71, 0x0000ffff,
0xc0471cb8, 0x00000040,
0xbf8cc07f, 0xc04b1d38,
0x00000048, 0xbf8cc07f,
0xc0431e78, 0x00000058,
0xbf8cc07f, 0xc0471eb8,
0x0000005c, 0xbf8cc07f,
0xb8fa1605, 0x807a817a,
0x8e7a867a, 0x80707a70,
0xbef60084, 0xbef600ff,
0x01000000, 0xbefe007c,
0xbefc0070, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
0xc0611b3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
0xbefc0070, 0xc0611b7a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
0xc0611bba, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
0xbefc0070, 0xc0611bfa,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
0xc0611e3a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xb8fbf803,
0xbefe007c, 0xbefc0070,
0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
0xbefc0070, 0xc0611a3a,
0x0000007c, 0xbf8cc07f,
0x80708470, 0xbefc007e,
0xbefe007c, 0xbefc0070,
0xc0611a7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0xb8f1f801,
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
0xbefc007e, 0x867aff7f,
0x04000000, 0xbeef0080,
0x876f6f7a, 0xb8f02a05,
0x80708170, 0x8e708a70,
0xb8fb1605, 0x807b817b,
0x8e7b847b, 0x8e76827b,
0xbef600ff, 0x01000000,
0xbef20174, 0x80747074,
0x82758075, 0xbefc0080,
0xbf800000, 0xbe802b00,
0xbe822b02, 0xbe842b04,
0xbe862b06, 0xbe882b08,
0xbe8a2b0a, 0xbe8c2b0c,
0xbe8e2b0e, 0xc06b003a,
0x00000000, 0xbf8cc07f,
0xc06b013a, 0x00000010,
0xbf8cc07f, 0xc06b023a,
0x00000020, 0xbf8cc07f,
0xc06b033a, 0x00000030,
0xbf8cc07f, 0x8074c074,
0x82758075, 0x807c907c,
0xbf0a7b7c, 0xbf85ffe7,
0xbef40172, 0xbef00080,
0xbefe00c1, 0xbeff00c1,
0xbee80080, 0xbee90080,
0xbef600ff, 0x01000000,
0xe0724000, 0x701d0000,
0xe0724100, 0x701d0100,
0xe0724200, 0x701d0200,
0xe0724300, 0x701d0300,
0xbefe00c1, 0xbeff00c1,
0xb8fb4306, 0x867bc17b,
0xbf84002c, 0xbf8a0000,
0x867aff6f, 0x04000000,
0xbf840028, 0x8e7b867b,
0x8e7b827b, 0xbef6007b,
0xb8f02a05, 0x80708170,
0x8e708a70, 0xb8fa1605,
0x807a817a, 0x8e7a867a,
0x80707a70, 0x8070ff70,
0x00000080, 0xbef600ff,
0x01000000, 0xbefc0080,
0xd28c0002, 0x000100c1,
0xd28d0003, 0x000204c1,
0xd1060002, 0x00011103,
0x7e0602ff, 0x00000200,
0xbefc00ff, 0x00010000,
0xbe800077, 0x8677ff77,
0xff7fffff, 0x8777ff77,
0x00058000, 0xd8ec0000,
0x00000002, 0xbf8cc07f,
0xe0765000, 0x701d0002,
0x68040702, 0xd0c9006a,
0x0000f702, 0xbf87fff7,
0xbef70000, 0xbef000ff,
0x00000400, 0xbefe00c1,
0xbeff00c1, 0xb8fb2a05,
0x807b817b, 0x8e7b827b,
0x8e76887b, 0xbef600ff,
0x01000000, 0xbefc0084,
0xbf0a7b7c, 0xbf840015,
0xbf11017c, 0x807bff7b,
0x00001000, 0x7e000300,
0x7e020301, 0x7e040302,
0x7e060303, 0xe0724000,
0x701d0000, 0xe0724100,
0x701d0100, 0xe0724200,
0x701d0200, 0xe0724300,
0x701d0300, 0x807c847c,
0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffef,
0xbf9c0000, 0xbf8200da,
0xbef4007e, 0x8675ff7f,
0x0000ffff, 0x8775ff75,
0x00040000, 0xbef60080,
0xbef700ff, 0x00807fac,
0x8670ff7f, 0x08000000,
0x8f708370, 0x87777077,
0x8670ff7f, 0x70000000,
0x8f708170, 0x87777077,
0xbefb007c, 0xbefa0080,
0xb8fa2a05, 0x807a817a,
0x8e7a8a7a, 0xb8f01605,
0x80708170, 0x8e708670,
0x807a707a, 0xbef60084,
0xbef600ff, 0x01000000,
0xbefe007c, 0xbefc007a,
0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611b3a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611b7a, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611bba,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611bfa, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611e3a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xb8f1f803, 0xbefe007c,
0xbefc007a, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611a3a, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611a7a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xb8fbf801, 0xbefe007c,
0xbefc007a, 0xc0611efa,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0x8670ff7f, 0x04000000,
0xbeef0080, 0x876f6f70,
0xb8fa2a05, 0x807a817a,
0x8e7a8a7a, 0xb8f11605,
0x80718171, 0x8e718471,
0x8e768271, 0xbef600ff,
0x01000000, 0xbef20174,
0x80747a74, 0x82758075,
0xbefc0080, 0xbf800000,
0xbe802b00, 0xbe822b02,
0xbe842b04, 0xbe862b06,
0xbe882b08, 0xbe8a2b0a,
0xbe8c2b0c, 0xbe8e2b0e,
0xc06b003a, 0x00000000,
0xbf8cc07f, 0xc06b013a,
0x00000010, 0xbf8cc07f,
0xc06b023a, 0x00000020,
0xbf8cc07f, 0xc06b033a,
0x00000030, 0xbf8cc07f,
0x8074c074, 0x82758075,
0x807c907c, 0xbf0a717c,
0xbf85ffe7, 0xbef40172,
0xbefa0080, 0xbefe00c1,
0xbeff00c1, 0xbee80080,
0xbee90080, 0xbef600ff,
0x01000000, 0xe0724000,
0x7a1d0000, 0xe0724100,
0x7a1d0100, 0xe0724200,
0x7a1d0200, 0xe0724300,
0x7a1d0300, 0xbefe00c1,
0xbeff00c1, 0xb8f14306,
0x8671c171, 0xbf84002c,
0xbf8a0000, 0x8670ff6f,
0x04000000, 0xbf840028,
0x8e718671, 0x8e718271,
0xbef60071, 0xb8fa2a05,
0x807a817a, 0x8e7a8a7a,
0xb8f01605, 0x80708170,
0x8e708670, 0x807a707a,
0x807aff7a, 0x00000080,
0xbef600ff, 0x01000000,
0xbefc0080, 0xd28c0002,
0x000100c1, 0xd28d0003,
0x000204c1, 0xd1060002,
0x00011103, 0x7e0602ff,
0x00000200, 0xbefc00ff,
0x00010000, 0xbe800077,
0x8677ff77, 0xff7fffff,
0x8777ff77, 0x00058000,
0xd8ec0000, 0x00000002,
0xbf8cc07f, 0xe0765000,
0x7a1d0002, 0x68040702,
0xd0c9006a, 0x0000e302,
0xbf87fff7, 0xbef70000,
0xbefa00ff, 0x00000400,
0xbefe00c1, 0xbeff00c1,
0xb8f12a05, 0x80718171,
0x8e718271, 0x8e768871,
0xbef600ff, 0x01000000,
0xbefc0084, 0xbf0a717c,
0xbf840015, 0xbf11017c,
0x8071ff71, 0x00001000,
0x7e000300, 0x7e020301,
0x7e040302, 0x7e060303,
0xe0724000, 0x7a1d0000,
0xe0724100, 0x7a1d0100,
0xe0724200, 0x7a1d0200,
0xe0724300, 0x7a1d0300,
0x807c847c, 0x807aff7a,
0x00000400, 0xbf0a717c,
0xbf85ffef, 0xbf9c0000,
0xbf8200dc, 0xbef4007e,
0x8675ff7f, 0x0000ffff,
0x8775ff75, 0x00040000,
0xbef60080, 0xbef700ff,
0x00807fac, 0x866eff7f,
0x08000000, 0x8f6e836e,
0x87776e77, 0x866eff7f,
0x70000000, 0x8f6e816e,
0x87776e77, 0x866eff7f,
0x04000000, 0xbf84001e,
0xbefe00c1, 0xbeff00c1,
0xb8ef4306, 0x866fc16f,
0xbf840019, 0x8e6f866f,
0x8e6f826f, 0xbef6006f,
0xb8f82a05, 0x80788178,
0x8e788a78, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
0x80786e78, 0x8078ff78,
0x00000080, 0xbef600ff,
0x01000000, 0xbefc0080,
0xe0510000, 0x781d0000,
0xe0510100, 0x781d0000,
0x807cff7c, 0x00000200,
0x8078ff78, 0x00000200,
0xbf0a6f7c, 0xbf85fff6,
0xbef80080, 0xbefe00c1,
0xbeff00c1, 0xb8ef2a05,
0x806f816f, 0x8e6f826f,
0x8e76886f, 0xbef600ff,
0x01000000, 0xbeee0078,
0x8078ff78, 0x00000400,
0xbefc0084, 0xbf11087c,
0x806fff6f, 0x00008000,
0xe0524000, 0x781d0000,
0xe0524100, 0x781d0100,
0xe0524200, 0x781d0200,
0xe0524300, 0x781d0300,
0xbf8c0f70, 0x7e000300,
0x7e020301, 0x7e040302,
0x7e060303, 0x807c847c,
0x8078ff78, 0x00000400,
0xbf0a6f7c, 0xbf85ffee,
0xbf9c0000, 0xe0524000,
0x6e1d0000, 0xe0524100,
0x6e1d0100, 0xe0524200,
0x6e1d0200, 0xe0524300,
0x6e1d0300, 0xb8f82a05,
0x866eff7f, 0x08000000,
0x8f6e836e, 0x87776e77,
0x866eff7f, 0x70000000,
0x8f6e816e, 0x87776e77,
0x866eff7f, 0x04000000,
0xbf84001e, 0xbefe00c1,
0xbeff00c1, 0xb8ef4306,
0x866fc16f, 0xbf840019,
0x8e6f866f, 0x8e6f826f,
0xbef6006f, 0xb8f82a05,
0x80788178, 0x8e788a78,
0xb8ee1605, 0x806e816e,
0x8e6e866e, 0x80786e78,
0x80f8c078, 0xb8ef1605,
0x806f816f, 0x8e6f846f,
0x8e76826f, 0xbef600ff,
0x01000000, 0xbefc006f,
0xc031003a, 0x00000078,
0x80f8c078, 0xbf8cc07f,
0x80fc907c, 0xbf800000,
0xbe802d00, 0xbe822d02,
0xbe842d04, 0xbe862d06,
0xbe882d08, 0xbe8a2d0a,
0xbe8c2d0c, 0xbe8e2d0e,
0xbf06807c, 0xbf84fff0,
0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
0xbefc0080, 0xe0510000,
0x781d0000, 0xe0510100,
0x781d0000, 0x807cff7c,
0x00000200, 0x8078ff78,
0x00000200, 0xbf0a6f7c,
0xbf85fff6, 0xbef80080,
0xbefe00c1, 0xbeff00c1,
0xb8ef2a05, 0x806f816f,
0x8e6f826f, 0x8e76886f,
0xbef600ff, 0x01000000,
0xbeee0078, 0x8078ff78,
0x00000400, 0xbefc0084,
0xbf11087c, 0x806fff6f,
0x00008000, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
0x781d0300, 0xbf8c0f70,
0x7e000300, 0x7e020301,
0x7e040302, 0x7e060303,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
0xbf85ffee, 0xbf9c0000,
0xe0524000, 0x6e1d0000,
0xe0524100, 0x6e1d0100,
0xe0524200, 0x6e1d0200,
0xe0524300, 0x6e1d0300,
0xb8f82a05, 0x80788178,
0x8e788a78, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
0x80786e78, 0xbef60084,
0x80786e78, 0x80f8c078,
0xb8ef1605, 0x806f816f,
0x8e6f846f, 0x8e76826f,
0xbef600ff, 0x01000000,
0xc0211bfa, 0x00000078,
0x80788478, 0xc0211b3a,
0xbefc006f, 0xc031003a,
0x00000078, 0x80f8c078,
0xbf8cc07f, 0x80fc907c,
0xbf800000, 0xbe802d00,
0xbe822d02, 0xbe842d04,
0xbe862d06, 0xbe882d08,
0xbe8a2d0a, 0xbe8c2d0c,
0xbe8e2d0e, 0xbf06807c,
0xbf84fff0, 0xb8f82a05,
0x80788178, 0x8e788a78,
0xb8ee1605, 0x806e816e,
0x8e6e866e, 0x80786e78,
0xbef60084, 0xbef600ff,
0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
0xc0211b7a, 0x00000078,
0x80788478, 0xc0211eba,
0xc0211b3a, 0x00000078,
0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
0xc0211efa, 0x00000078,
0x80788478, 0xc0211c3a,
0xc0211c3a, 0x00000078,
0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
0xc0211c7a, 0x00000078,
0x80788478, 0xc0211a3a,
0xc0211eba, 0x00000078,
0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
0xc0211a7a, 0x00000078,
0x80788478, 0xc0211cfa,
0xc0211a3a, 0x00000078,
0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
0xbf8cc07f, 0xbefc006f,
0xbefe007a, 0xbeff007b,
0x866f71ff, 0x000003ff,
0xb96f4803, 0x866f71ff,
0xfffff800, 0x8f6f8b6f,
0xb96fa2c3, 0xb973f801,
0xb8ee2a05, 0x806e816e,
0x8e6e8a6e, 0xb8ef1605,
0x806f816f, 0x8e6f866f,
0x806e6f6e, 0x806e746e,
0x826f8075, 0x866fff6f,
0x0000ffff, 0xc0071cb7,
0x00000040, 0xc00b1d37,
0x00000048, 0xc0031e77,
0x00000058, 0xc0071eb7,
0x0000005c, 0xbf8cc07f,
0x866fff6d, 0xf0000000,
0x8f6f9c6f, 0x8e6f906f,
0xbeee0080, 0x876e6f6e,
0x866fff6d, 0x08000000,
0x8f6f9b6f, 0x8e6f8f6f,
0x876e6f6e, 0x866fff70,
0x00800000, 0x8f6f976f,
0xb96ef807, 0x866dff6d,
0x0000ffff, 0x86fe7e7e,
0x86ea6a6a, 0x8f6e8370,
0xb96ee0c2, 0xbf800002,
0xb9700002, 0xbf8a0000,
0x95806f6c, 0xbf810000,
0xc0211cfa, 0x00000078,
0x80788478, 0xbf8cc07f,
0xbefc006f, 0xbefe0070,
0xbeff0071, 0x866f7bff,
0x000003ff, 0xb96f4803,
0x866f7bff, 0xfffff800,
0x8f6f8b6f, 0xb96fa2c3,
0xb973f801, 0xb8ee2a05,
0x806e816e, 0x8e6e8a6e,
0xb8ef1605, 0x806f816f,
0x8e6f866f, 0x806e6f6e,
0x806e746e, 0x826f8075,
0x866fff6f, 0x0000ffff,
0xc00b1c37, 0x00000050,
0xc00b1d37, 0x00000060,
0xc0031e77, 0x00000074,
0xbf8cc07f, 0x866fff6d,
0xf8000000, 0x8f6f9b6f,
0x8e6f906f, 0xbeee0080,
0x876e6f6e, 0x866fff6d,
0x04000000, 0x8f6f9a6f,
0x8e6f8f6f, 0x876e6f6e,
0x866fff7a, 0x00800000,
0x8f6f976f, 0xb96ef807,
0x866dff6d, 0x0000ffff,
0x86fe7e7e, 0x86ea6a6a,
0x8f6e837a, 0xb96ee0c2,
0xbf800002, 0xb97a0002,
0xbf8a0000, 0x95806f6c,
0xbf810000, 0x00000000,
};

View File

@@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
end
//check whether there is mem_viol
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
s_cbranch_scc0 L_NO_PC_REWIND
//if so, need rewind PC assuming GDS operation gets NACKed
s_mov_b32 s_save_tmp, 0 //clear mem_viol bit
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc
L_NO_PC_REWIND:
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit

View File

@@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
var s_save_spi_init_lo = exec_lo
var s_save_spi_init_hi = exec_hi
@@ -162,8 +162,8 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra
var s_save_pc_hi = ttmp1
var s_save_exec_lo = ttmp2
var s_save_exec_hi = ttmp3
var s_save_tmp = ttmp4
var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
var s_save_tmp = ttmp14
var s_save_trapsts = ttmp15 //not really used until the end of the SAVE routine
var s_save_xnack_mask_lo = ttmp6
var s_save_xnack_mask_hi = ttmp7
var s_save_buf_rsrc0 = ttmp8
@@ -171,9 +171,9 @@ var s_save_buf_rsrc1 = ttmp9
var s_save_buf_rsrc2 = ttmp10
var s_save_buf_rsrc3 = ttmp11
var s_save_status = ttmp12
var s_save_mem_offset = ttmp14
var s_save_mem_offset = ttmp4
var s_save_alloc_size = s_save_trapsts //conflict
var s_save_m0 = ttmp15
var s_save_m0 = ttmp5
var s_save_ttmps_lo = s_save_tmp //no conflict
var s_save_ttmps_hi = s_save_trapsts //no conflict
@@ -207,10 +207,10 @@ var s_restore_mode = ttmp7
var s_restore_pc_lo = ttmp0
var s_restore_pc_hi = ttmp1
var s_restore_exec_lo = ttmp14
var s_restore_exec_hi = ttmp15
var s_restore_status = ttmp4
var s_restore_trapsts = ttmp5
var s_restore_exec_lo = ttmp4
var s_restore_exec_hi = ttmp5
var s_restore_status = ttmp14
var s_restore_trapsts = ttmp15
var s_restore_xnack_mask_lo = xnack_mask_lo
var s_restore_xnack_mask_hi = xnack_mask_hi
var s_restore_buf_rsrc0 = ttmp8
@@ -266,10 +266,16 @@ if (!EMU_RUN_HACK)
L_HALT_WAVE:
// If STATUS.HALT is set then this fault must come from SQC instruction fetch.
// We cannot prevent further faults so just terminate the wavefront.
// We cannot prevent further faults. Spin wait until context saved.
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
s_cbranch_scc0 L_NOT_ALREADY_HALTED
s_endpgm
L_WAIT_CTX_SAVE:
s_sleep 0x10
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
s_cbranch_scc0 L_WAIT_CTX_SAVE
L_NOT_ALREADY_HALTED:
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
@@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP:
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
// ttmp12 holds SQ_WAVE_STATUS
s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
s_waitcnt lgkmcnt(0)
s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
s_waitcnt lgkmcnt(0)
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
@@ -405,7 +411,7 @@ end
else
end
// Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
get_vgpr_size_bytes(s_save_ttmps_lo)
get_sgpr_size_bytes(s_save_ttmps_hi)
@@ -413,13 +419,11 @@ end
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
ack_sqc_store_workaround()
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
ack_sqc_store_workaround()
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
ack_sqc_store_workaround()
s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
ack_sqc_store_workaround()
/* setup Resource Contants */
@@ -1093,7 +1097,7 @@ end
//s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
// Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
get_vgpr_size_bytes(s_restore_ttmps_lo)
get_sgpr_size_bytes(s_restore_ttmps_hi)
@@ -1101,10 +1105,9 @@ end
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
s_waitcnt lgkmcnt(0)
//reuse s_restore_m0 as a temp register

View File

@@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
q_properties->type = KFD_QUEUE_TYPE_SDMA;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
else
return -ENOTSUPP;
@@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
struct kfd_process_device *pdd;
dev = kfd_device_by_id(args->gpu_id);
if (dev == NULL)
if (!dev)
return -EINVAL;
mutex_lock(&p->mutex);
@@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
if (args->size != kfd_doorbell_process_slice(dev))
return -EINVAL;
offset = kfd_get_process_doorbells(dev, p);
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
if (args->size != PAGE_SIZE)
return -EINVAL;
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
if (!offset)
return -ENOMEM;
}
mutex_lock(&p->mutex);
@@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
args->mmap_offset = offset;
/* MMIO is mapped through kfd device
* Generate a kfd mmap offset
*/
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
args->mmap_offset <<= PAGE_SHIFT;
}
return 0;
err_free:
@@ -1551,6 +1567,32 @@ copy_from_user_failed:
return err;
}
static int kfd_ioctl_alloc_queue_gws(struct file *filep,
struct kfd_process *p, void *data)
{
int retval;
struct kfd_ioctl_alloc_queue_gws_args *args = data;
struct kfd_dev *dev;
if (!hws_gws_support)
return -EINVAL;
dev = kfd_device_by_id(args->gpu_id);
if (!dev) {
pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
return -EINVAL;
}
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
return -EINVAL;
mutex_lock(&p->mutex);
retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
mutex_unlock(&p->mutex);
args->first_gws = 0;
return retval;
}
static int kfd_ioctl_get_dmabuf_info(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
kfd_ioctl_import_dmabuf, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
kfd_ioctl_alloc_queue_gws, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
@@ -1845,6 +1889,39 @@ err_i1:
return retcode;
}
static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
phys_addr_t address;
int ret;
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
VM_DONTDUMP | VM_PFNMAP;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pr_debug("Process %d mapping mmio page\n"
" target user address == 0x%08llX\n"
" physical address == 0x%08llX\n"
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
process->pasid, (unsigned long long) vma->vm_start,
address, vma->vm_flags, PAGE_SIZE);
ret = io_remap_pfn_range(vma,
vma->vm_start,
address >> PAGE_SHIFT,
PAGE_SIZE,
vma->vm_page_prot);
return ret;
}
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
@@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
if (!dev)
return -ENODEV;
return kfd_reserved_mem_mmap(dev, process, vma);
case KFD_MMAP_TYPE_MMIO:
if (!dev)
return -ENODEV;
return kfd_mmio_mmap(dev, process, vma);
}
return -EFAULT;

View File

@@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
#define polaris10_cache_info carrizo_cache_info
#define polaris11_cache_info carrizo_cache_info
#define polaris12_cache_info carrizo_cache_info
#define vegam_cache_info carrizo_cache_info
/* TODO - check & update Vega10 cache details */
#define vega10_cache_info carrizo_cache_info
#define raven_cache_info carrizo_cache_info
@@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
props->weight = 20;
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
props->weight = 15;
props->weight = 15 * iolink->num_hops_xgmi;
else
props->weight = node_distance(id_from, id_to);
@@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = polaris12_cache_info;
num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
break;
case CHIP_VEGAM:
pcache_info = vegam_cache_info;
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
@@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
struct kfd_dev *kdev,
struct kfd_dev *peer_kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain_from,
uint32_t proximity_domain_to)
@@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->proximity_domain_from = proximity_domain_from;
sub_type_hdr->proximity_domain_to = proximity_domain_to;
sub_type_hdr->num_hops_xgmi =
amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
return 0;
}
@@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
(char *)sub_type_hdr +
sizeof(struct crat_subtype_iolink));
ret = kfd_fill_gpu_xgmi_link_to_gpu(
&avail_size, kdev,
&avail_size, kdev, peer_dev->gpu,
(struct crat_subtype_iolink *)sub_type_hdr,
proximity_domain, nid);
if (ret < 0)

View File

@@ -274,7 +274,8 @@ struct crat_subtype_iolink {
uint32_t minimum_bandwidth_mbs;
uint32_t maximum_bandwidth_mbs;
uint32_t recommended_transfer_size;
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH];
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
uint8_t num_hops_xgmi;
};
/*

View File

@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = true,
.num_sdma_engines = 1,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
#endif
@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
static const struct kfd_device_info vegam_device_info = {
.asic_family = CHIP_VEGAM,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
.doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.supports_cwsr = true,
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
};
@@ -373,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x6995, &polaris12_device_info }, /* Polaris12 */
{ 0x6997, &polaris12_device_info }, /* Polaris12 */
{ 0x699F, &polaris12_device_info }, /* Polaris12 */
{ 0x694C, &vegam_device_info }, /* VegaM */
{ 0x694E, &vegam_device_info }, /* VegaM */
{ 0x694F, &vegam_device_info }, /* VegaM */
{ 0x6860, &vega10_device_info }, /* Vega10 */
{ 0x6861, &vega10_device_info }, /* Vega10 */
{ 0x6862, &vega10_device_info }, /* Vega10 */
@@ -518,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
} else
kfd->max_proc_per_quantum = hws_max_conc_proc;
/* Allocate global GWS that is shared by all KFD processes */
if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
dev_err(kfd_device, "Could not allocate %d gws\n",
amdgpu_amdkfd_get_num_gws(kfd->kgd));
goto out;
}
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
@@ -541,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
false)) {
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
goto out;
goto alloc_gtt_mem_failure;
}
dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
@@ -611,6 +653,9 @@ kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
alloc_gtt_mem_failure:
if (hws_gws_support)
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
@@ -628,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_doorbell_fini(kfd);
kfd_gtt_sa_fini(kfd);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
if (hws_gws_support)
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
}
kfree(kfd);

View File

@@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
unsigned int sdma_queue_id);
struct queue *q);
static void kfd_process_hw_exception(struct work_struct *work);
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
{
if (type == KFD_QUEUE_TYPE_SDMA)
if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
return KFD_MQD_TYPE_SDMA;
return KFD_MQD_TYPE_CP;
}
@@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
return dqm->dev->device_info->num_sdma_engines;
}
static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
{
return dqm->dev->device_info->num_xgmi_sdma_engines;
}
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
return dqm->dev->device_info->num_sdma_engines
* dqm->dev->device_info->num_sdma_queues_per_engine;
}
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
{
return dqm->dev->device_info->num_xgmi_sdma_engines
* dqm->dev->device_info->num_sdma_queues_per_engine;
}
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
* preserve the user mode ABI.
*/
q->doorbell_id = q->properties.queue_id;
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
/* For SDMA queues on SOC15 with 8-byte doorbell, use static
* doorbell assignments based on the engine and queue id.
* The doobell index distance between RLC (2*i) and (2*i+1)
@@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
struct kfd_dev *dev = qpd->dqm->dev;
if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
q->properties.type == KFD_QUEUE_TYPE_SDMA)
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
return;
old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
@@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
retval = create_compute_queue_nocpsch(dqm, q, qpd);
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
retval = create_sdma_queue_nocpsch(dqm, q, qpd);
else
retval = -EINVAL;
@@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
dqm->xgmi_sdma_queue_count++;
/*
* Unconditionally increment this counter, regardless of the queue's
@@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
int retval;
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
if (!mqd_mgr)
return -ENOMEM;
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
retval = allocate_hqd(dqm, q);
if (retval)
@@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
int retval;
struct mqd_manager *mqd_mgr;
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr)
return -ENOMEM;
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
deallocate_hqd(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
deallocate_sdma_queue(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm->xgmi_sdma_queue_count--;
deallocate_sdma_queue(dqm, q);
} else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
@@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
retval = -ENODEV;
goto out_unlock;
}
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) {
retval = -ENOMEM;
goto out_unlock;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
/*
* Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later
@@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
}
} else if (prev_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
@@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
retval = map_queues_cpsch(dqm);
else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
if (WARN(q->process->mm != current->mm,
"should only run in user thread"))
retval = -EFAULT;
@@ -571,27 +584,6 @@ out_unlock:
return retval;
}
static struct mqd_manager *get_mqd_manager(
struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
{
struct mqd_manager *mqd_mgr;
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
pr_debug("mqd type %d\n", type);
mqd_mgr = dqm->mqd_mgrs[type];
if (!mqd_mgr) {
mqd_mgr = mqd_manager_init(type, dqm->dev);
if (!mqd_mgr)
pr_err("mqd manager is NULL");
dqm->mqd_mgrs[type] = mqd_mgr;
}
return mqd_mgr;
}
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_active)
continue;
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) { /* should not be here */
pr_err("Cannot evict queue, mqd mgr is NULL\n");
retval = -ENOMEM;
goto out;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_evicted = true;
q->properties.is_active = false;
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
@@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_evicted)
continue;
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) { /* should not be here */
pr_err("Cannot restore queue, mqd mgr is NULL\n");
retval = -ENOMEM;
goto out;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_evicted = false;
q->properties.is_active = true;
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
@@ -812,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm,
retval = dqm->asic_ops.update_qpd(dqm, qpd);
dqm->processes_count++;
kfd_inc_compute_active(dqm->dev);
dqm_unlock(dqm);
/* Outside the DQM lock because under the DQM lock we can't do
* reclaim or take other locks that others hold while reclaiming.
*/
kfd_inc_compute_active(dqm->dev);
return retval;
}
@@ -836,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
kfd_dec_compute_active(dqm->dev);
goto out;
}
}
@@ -844,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm,
retval = 1;
out:
dqm_unlock(dqm);
/* Outside the DQM lock because under the DQM lock we can't do
* reclaim or take other locks that others hold while reclaiming.
*/
if (!retval)
kfd_dec_compute_active(dqm->dev);
return retval;
}
@@ -879,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
dqm->sdma_queue_count = 0;
dqm->xgmi_sdma_queue_count = 0;
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -890,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
}
dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
return 0;
}
@@ -921,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
}
static int allocate_sdma_queue(struct device_queue_manager *dqm,
unsigned int *sdma_queue_id)
struct queue *q)
{
int bit;
if (dqm->sdma_bitmap == 0)
return -ENOMEM;
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
if (dqm->sdma_bitmap == 0)
return -ENOMEM;
bit = __ffs64(dqm->sdma_bitmap);
dqm->sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
q->properties.sdma_engine_id = q->sdma_id %
get_num_sdma_engines(dqm);
q->properties.sdma_queue_id = q->sdma_id /
get_num_sdma_engines(dqm);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (dqm->xgmi_sdma_bitmap == 0)
return -ENOMEM;
bit = __ffs64(dqm->xgmi_sdma_bitmap);
dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
/* sdma_engine_id is sdma id including
* both PCIe-optimized SDMAs and XGMI-
* optimized SDMAs. The calculation below
* assumes the first N engines are always
* PCIe-optimized ones
*/
q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
q->sdma_id % get_num_xgmi_sdma_engines(dqm);
q->properties.sdma_queue_id = q->sdma_id /
get_num_xgmi_sdma_engines(dqm);
}
bit = ffs(dqm->sdma_bitmap) - 1;
dqm->sdma_bitmap &= ~(1 << bit);
*sdma_queue_id = bit;
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
return 0;
}
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
unsigned int sdma_queue_id)
struct queue *q)
{
if (sdma_queue_id >= get_num_sdma_queues(dqm))
return;
dqm->sdma_bitmap |= (1 << sdma_queue_id);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
if (q->sdma_id >= get_num_sdma_queues(dqm))
return;
dqm->sdma_bitmap |= (1ULL << q->sdma_id);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
return;
dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
}
}
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
@@ -950,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
int retval;
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
if (!mqd_mgr)
return -ENOMEM;
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
retval = allocate_sdma_queue(dqm, &q->sdma_id);
retval = allocate_sdma_queue(dqm, q);
if (retval)
return retval;
q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
retval = allocate_doorbell(qpd, q);
if (retval)
goto out_deallocate_sdma_queue;
pr_debug("SDMA id is: %d\n", q->sdma_id);
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
@@ -987,7 +1002,7 @@ out_uninit_mqd:
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
deallocate_sdma_queue(dqm, q->sdma_id);
deallocate_sdma_queue(dqm, q);
return retval;
}
@@ -1045,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->processes_count = 0;
dqm->sdma_queue_count = 0;
dqm->xgmi_sdma_queue_count = 0;
dqm->active_runlist = false;
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
@@ -1161,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
int retval;
struct mqd_manager *mqd_mgr;
retval = 0;
dqm_lock(dqm);
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
pr_warn("Can't create new usermode queue because %d queues were already created\n",
dqm->total_queue_count);
retval = -EPERM;
goto out_unlock;
goto out;
}
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
retval = allocate_sdma_queue(dqm, &q->sdma_id);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
retval = allocate_sdma_queue(dqm, q);
if (retval)
goto out_unlock;
q->properties.sdma_queue_id =
q->sdma_id / get_num_sdma_engines(dqm);
q->properties.sdma_engine_id =
q->sdma_id % get_num_sdma_engines(dqm);
goto out;
}
retval = allocate_doorbell(qpd, q);
if (retval)
goto out_deallocate_sdma_queue;
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) {
retval = -ENOMEM;
goto out_deallocate_doorbell;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
/*
* Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later
@@ -1201,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
q->properties.is_evicted = (q->properties.queue_size > 0 &&
q->properties.queue_percent > 0 &&
q->properties.queue_address != 0);
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
q->properties.tba_addr = qpd->tba_addr;
q->properties.tma_addr = qpd->tma_addr;
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
@@ -1211,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (retval)
goto out_deallocate_doorbell;
dqm_lock(dqm);
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active) {
@@ -1221,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
dqm->xgmi_sdma_queue_count++;
/*
* Unconditionally increment this counter, regardless of the queue's
* type or whether the queue is active.
@@ -1236,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q->sdma_id);
out_unlock:
dqm_unlock(dqm);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
out:
return retval;
}
@@ -1268,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
return 0;
}
static int unmap_sdma_queues(struct device_queue_manager *dqm,
unsigned int sdma_engine)
static int unmap_sdma_queues(struct device_queue_manager *dqm)
{
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
sdma_engine);
int i, retval = 0;
for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
if (retval)
return retval;
}
return retval;
}
/* dqm->lock mutex has to be locked before calling this function */
@@ -1309,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (!dqm->active_runlist)
return retval;
pr_debug("Before destroying queues, sdma queue count is : %u\n",
dqm->sdma_queue_count);
pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
if (dqm->sdma_queue_count > 0) {
unmap_sdma_queues(dqm, 0);
unmap_sdma_queues(dqm, 1);
}
if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
unmap_sdma_queues(dqm);
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
filter, filter_param, false, 0);
@@ -1379,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
}
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) {
retval = -ENOMEM;
goto failed;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
deallocate_doorbell(qpd, q);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
deallocate_sdma_queue(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm->xgmi_sdma_queue_count--;
deallocate_sdma_queue(dqm, q);
}
list_del(&q->list);
@@ -1403,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = true;
}
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
/*
* Unconditionally decrement this counter, regardless of the queue's
* type
@@ -1415,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
dqm_unlock(dqm);
/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
return retval;
failed:
failed_try_destroy_debugged_queue:
dqm_unlock(dqm);
@@ -1520,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
struct queue *q, *next;
struct device_process_node *cur, *next_dpn;
int retval = 0;
bool found = false;
dqm_lock(dqm);
@@ -1538,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
kfd_dec_compute_active(dqm->dev);
found = true;
break;
}
}
dqm_unlock(dqm);
/* Outside the DQM lock because under the DQM lock we can't do
* reclaim or take other locks that others hold while reclaiming.
*/
if (found)
kfd_dec_compute_active(dqm->dev);
return retval;
}
@@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
goto dqm_unlock;
}
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
if (!mqd_mgr) {
r = -ENOMEM;
goto dqm_unlock;
}
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
if (!mqd_mgr->get_wave_state) {
r = -EINVAL;
@@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
struct device_process_node *cur, *next_dpn;
enum kfd_unmap_queues_filter filter =
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
bool found = false;
retval = 0;
@@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
deallocate_sdma_queue(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm->xgmi_sdma_queue_count--;
deallocate_sdma_queue(dqm, q);
}
if (q->properties.is_active)
@@ -1626,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
kfd_dec_compute_active(dqm->dev);
found = true;
break;
}
}
@@ -1638,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = false;
}
/* lastly, free mqd resources */
dqm_unlock(dqm);
/* Outside the DQM lock because under the DQM lock we can't do
* reclaim or take other locks that others hold while reclaiming.
*/
if (found)
kfd_dec_compute_active(dqm->dev);
/* Lastly, free mqd resources.
* Do uninit_mqd() after dqm_unlock to avoid circular locking.
*/
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd_mgr) {
retval = -ENOMEM;
goto out;
}
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
list_del(&q->list);
qpd->queue_count--;
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
}
out:
dqm_unlock(dqm);
return retval;
}
static int init_mqd_managers(struct device_queue_manager *dqm)
{
int i, j;
struct mqd_manager *mqd_mgr;
for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
if (!mqd_mgr) {
pr_err("mqd manager [%d] initialization failed\n", i);
goto out_free;
}
dqm->mqd_mgrs[i] = mqd_mgr;
}
return 0;
out_free:
for (j = 0; j < i; j++) {
kfree(dqm->mqd_mgrs[j]);
dqm->mqd_mgrs[j] = NULL;
}
return -ENOMEM;
}
/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
{
int retval;
struct kfd_dev *dev = dqm->dev;
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
dev->device_info->num_sdma_engines *
dev->device_info->num_sdma_queues_per_engine +
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
(void *)&(mem_obj->cpu_ptr), true);
return retval;
}
@@ -1693,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.stop = stop_cpsch;
dqm->ops.destroy_queue = destroy_queue_cpsch;
dqm->ops.update_queue = update_queue;
dqm->ops.get_mqd_manager = get_mqd_manager;
dqm->ops.register_process = register_process;
dqm->ops.unregister_process = unregister_process;
dqm->ops.uninitialize = uninitialize;
@@ -1713,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.create_queue = create_queue_nocpsch;
dqm->ops.destroy_queue = destroy_queue_nocpsch;
dqm->ops.update_queue = update_queue;
dqm->ops.get_mqd_manager = get_mqd_manager;
dqm->ops.register_process = register_process;
dqm->ops.unregister_process = unregister_process;
dqm->ops.initialize = initialize_nocpsch;
@@ -1749,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
break;
@@ -1764,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
goto out_free;
}
if (init_mqd_managers(dqm))
goto out_free;
if (allocate_hiq_sdma_mqd(dqm)) {
pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
goto out_free;
}
if (!dqm->ops.initialize(dqm))
return dqm;
@@ -1772,9 +1843,17 @@ out_free:
return NULL;
}
void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd)
{
WARN(!mqd, "No hiq sdma mqd trunk to free");
amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
}
void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
dqm->ops.uninitialize(dqm);
deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
kfree(dqm);
}

View File

@@ -48,8 +48,6 @@ struct device_process_node {
*
* @update_queue: Queue update routine.
*
* @get_mqd_manager: Returns the mqd manager according to the mqd type.
*
* @exeute_queues: Dispatches the queues list to the H/W.
*
* @register_process: This routine associates a specific process with device.
@@ -97,10 +95,6 @@ struct device_queue_manager_ops {
int (*update_queue)(struct device_queue_manager *dqm,
struct queue *q);
struct mqd_manager * (*get_mqd_manager)
(struct device_queue_manager *dqm,
enum KFD_MQD_TYPE type);
int (*register_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops {
void (*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
};
/**
@@ -185,10 +181,12 @@ struct device_queue_manager {
unsigned int processes_count;
unsigned int queue_count;
unsigned int sdma_queue_count;
unsigned int xgmi_sdma_queue_count;
unsigned int total_queue_count;
unsigned int next_pipe_to_allocate;
unsigned int *allocated_queues;
unsigned int sdma_bitmap;
uint64_t sdma_bitmap;
uint64_t xgmi_sdma_bitmap;
unsigned int vmid_bitmap;
uint64_t pipelines_addr;
struct kfd_mem_obj *pipeline_mem;
@@ -201,6 +199,7 @@ struct device_queue_manager {
/* hw exception */
bool is_hws_hang;
struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
};
void device_queue_manager_init_cik(
@@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{

View File

@@ -48,6 +48,7 @@ void device_queue_manager_init_cik(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik;
asic_ops->init_sdma_vm = init_sdma_vm;
asic_ops->mqd_manager_init = mqd_manager_init_cik;
}
void device_queue_manager_init_cik_hawaii(
@@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik_hawaii;
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)

View File

@@ -37,6 +37,7 @@ void device_queue_manager_init_v9(
{
asic_ops->update_qpd = update_qpd_v9;
asic_ops->init_sdma_vm = init_sdma_vm_v9;
asic_ops->mqd_manager_init = mqd_manager_init_v9;
}
static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)

View File

@@ -54,6 +54,7 @@ void device_queue_manager_init_vi(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
asic_ops->update_qpd = update_qpd_vi;
asic_ops->init_sdma_vm = init_sdma_vm;
asic_ops->mqd_manager_init = mqd_manager_init_vi;
}
void device_queue_manager_init_vi_tonga(
@@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
asic_ops->update_qpd = update_qpd_vi_tonga;
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)

View File

@@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
return; /* Presumably process exited. */
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.gpu_id = dev->id;
memory_exception_data.failure.imprecise = 1;
memory_exception_data.failure.imprecise = true;
/* Set failure reason */
if (info) {
memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;

View File

@@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
kfd_init_apertures_vi(pdd, id);
break;
case CHIP_VEGA10:
@@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process)
return 0;
}

View File

@@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->nop_packet = nop.u32all;
switch (type) {
case KFD_QUEUE_TYPE_DIQ:
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
break;
case KFD_QUEUE_TYPE_HIQ:
kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm,
KFD_MQD_TYPE_HIQ);
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
break;
default:
pr_err("Invalid queue type %d\n", type);
@@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
kernel_queue_init_vi(&kq->ops_asic_specific);
break;

View File

@@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
sizeof(struct pm4_mes_map_queues));
packet->bitfields2.alloc_format =
alloc_format__mes_map_queues__one_per_pipe_vi;
packet->bitfields2.num_queues = 1;
packet->bitfields2.queue_sel =
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
packet->bitfields2.queue_type =
queue_type__mes_map_queues__normal_compute_vi;
@@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_map_queues__debug_interface_queue_vi;
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi;
use_static = false; /* no static queues under SDMA */
@@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
engine_sel__mes_unmap_queues__compute;
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
break;

View File

@@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
sizeof(struct pm4_mes_map_queues));
packet->bitfields2.alloc_format =
alloc_format__mes_map_queues__one_per_pipe_vi;
packet->bitfields2.num_queues = 1;
packet->bitfields2.queue_sel =
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
@@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_map_queues__debug_interface_queue_vi;
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi;
use_static = false; /* no static queues under SDMA */
@@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
engine_sel__mes_unmap_queues__compute;
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
break;

View File

@@ -23,34 +23,54 @@
#include "kfd_mqd_manager.h"
#include "amdgpu_amdkfd.h"
#include "kfd_device_queue_manager.h"
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct kfd_dev *dev)
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev)
{
switch (dev->device_info->asic_family) {
case CHIP_KAVERI:
return mqd_manager_init_cik(type, dev);
case CHIP_HAWAII:
return mqd_manager_init_cik_hawaii(type, dev);
case CHIP_CARRIZO:
return mqd_manager_init_vi(type, dev);
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
return mqd_manager_init_vi_tonga(type, dev);
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
case CHIP_RAVEN:
return mqd_manager_init_v9(type, dev);
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
}
struct kfd_mem_obj *mqd_mem_obj = NULL;
return NULL;
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
return mqd_mem_obj;
}
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
struct queue_properties *q)
{
struct kfd_mem_obj *mqd_mem_obj = NULL;
uint64_t offset;
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
offset = (q->sdma_engine_id *
dev->device_info->num_sdma_queues_per_engine +
q->sdma_queue_id) *
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+ offset);
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
return mqd_mem_obj;
}
void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
WARN_ON(!mqd_mem_obj->gtt_mem);
kfree(mqd_mem_obj);
}
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,

View File

@@ -99,8 +99,16 @@ struct mqd_manager {
struct mutex mqd_mutex;
struct kfd_dev *dev;
uint32_t mqd_size;
};
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev);
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
struct queue_properties *q);
void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj);
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);

View File

@@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
{
struct kfd_mem_obj *mqd_mem_obj;
if (q->type == KFD_QUEUE_TYPE_HIQ)
return allocate_hiq_mqd(kfd);
if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd),
&mqd_mem_obj))
return NULL;
return mqd_mem_obj;
}
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
uint64_t addr;
struct cik_mqd *m;
int retval;
struct kfd_dev *kfd = mm->dev;
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_mqd(kfd, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
{
int retval;
struct cik_sdma_rlc_registers *m;
struct kfd_dev *dev = mm->dev;
retval = kfd_gtt_sa_allocate(mm->dev,
sizeof(struct cik_sdma_rlc_registers),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
@@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
uint32_t queue_id, struct queue_properties *p,
@@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_DIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_SDMA:
mqd->init_mqd = init_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif

View File

@@ -67,6 +67,43 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
{
int retval;
struct kfd_mem_obj *mqd_mem_obj = NULL;
if (q->type == KFD_QUEUE_TYPE_HIQ)
return allocate_hiq_mqd(kfd);
/* From V9, for CWSR, the control stack is located on the next page
* boundary after the mqd, we will use the gtt allocation function
* instead of sub-allocation function.
*/
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
if (!mqd_mem_obj)
return NULL;
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
&(mqd_mem_obj->gtt_mem),
&(mqd_mem_obj->gpu_addr),
(void *)&(mqd_mem_obj->cpu_ptr), true);
} else {
retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
&mqd_mem_obj);
}
if (retval) {
kfree(mqd_mem_obj);
return NULL;
}
return mqd_mem_obj;
}
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -76,24 +113,8 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
struct v9_mqd *m;
struct kfd_dev *kfd = mm->dev;
/* From V9, for CWSR, the control stack is located on the next page
* boundary after the mqd, we will use the gtt allocation function
* instead of sub-allocation function.
*/
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!*mqd_mem_obj)
return -ENOMEM;
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
&((*mqd_mem_obj)->gtt_mem),
&((*mqd_mem_obj)->gpu_addr),
(void *)&((*mqd_mem_obj)->cpu_ptr), true);
} else
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_mqd(kfd, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
{
int retval;
struct v9_sdma_mqd *m;
struct kfd_dev *dev = mm->dev;
retval = kfd_gtt_sa_allocate(mm->dev,
sizeof(struct v9_sdma_mqd),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
return retval;
}
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
@@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->get_wave_state = get_wave_state;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_DIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_SDMA:
mqd->init_mqd = init_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
mqd->mqd_size = sizeof(struct v9_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif

View File

@@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
{
struct kfd_mem_obj *mqd_mem_obj;
if (q->type == KFD_QUEUE_TYPE_HIQ)
return allocate_hiq_mqd(kfd);
if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd),
&mqd_mem_obj))
return NULL;
return mqd_mem_obj;
}
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
int retval;
uint64_t addr;
struct vi_mqd *m;
struct kfd_dev *kfd = mm->dev;
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_mqd(kfd, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
{
int retval;
struct vi_sdma_mqd *m;
struct kfd_dev *dev = mm->dev;
retval = kfd_gtt_sa_allocate(mm->dev,
sizeof(struct vi_sdma_mqd),
mqd_mem_obj);
if (retval != 0)
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
memset(m, 0, sizeof(struct vi_sdma_mqd));
*mqd = m;
if (gart_addr != NULL)
if (gart_addr)
*gart_addr = (*mqd_mem_obj)->gpu_addr;
retval = mm->update_mqd(mm, m, q);
@@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
return retval;
}
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
@@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->get_wave_state = get_wave_state;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_DIQ:
mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_SDMA:
mqd->init_mqd = init_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_sdma;
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
mqd->mqd_size = sizeof(struct vi_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif

View File

@@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
process_count = pm->dqm->processes_count;
queue_count = pm->dqm->queue_count;
compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
pm->dqm->xgmi_sdma_queue_count;
/* check if there is over subscription
* Note: the arbitration between the number of VMIDs and
@@ -227,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
pm->pmf = &kfd_vi_pm_funcs;
break;
case CHIP_VEGA10:

View File

@@ -176,8 +176,7 @@ struct pm4_mes_map_process {
union {
struct {
uint32_t num_gws:6;
uint32_t reserved7:1;
uint32_t num_gws:7;
uint32_t sdma_enable:1;
uint32_t num_oac:4;
uint32_t reserved8:4;
@@ -255,11 +254,6 @@ enum mes_map_queues_queue_type_enum {
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
};
enum mes_map_queues_alloc_format_enum {
alloc_format__mes_map_queues__one_per_pipe_vi = 0,
alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
};
enum mes_map_queues_engine_sel_enum {
engine_sel__mes_map_queues__compute_vi = 0,
engine_sel__mes_map_queues__sdma0_vi = 2,
@@ -277,9 +271,11 @@ struct pm4_mes_map_queues {
struct {
uint32_t reserved1:4;
enum mes_map_queues_queue_sel_enum queue_sel:2;
uint32_t reserved2:15;
uint32_t reserved5:6;
uint32_t gws_control_queue:1;
uint32_t reserved2:8;
enum mes_map_queues_queue_type_enum queue_type:3;
enum mes_map_queues_alloc_format_enum alloc_format:2;
uint32_t reserved3:2;
enum mes_map_queues_engine_sel_enum engine_sel:3;
uint32_t num_queues:3;
} bitfields2;

View File

@@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum {
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
};
enum mes_map_queues_alloc_format_vi_enum {
alloc_format__mes_map_queues__one_per_pipe_vi = 0,
alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
};
enum mes_map_queues_engine_sel_vi_enum {
engine_sel__mes_map_queues__compute_vi = 0,
engine_sel__mes_map_queues__sdma0_vi = 2,
@@ -240,7 +235,7 @@ struct pm4_mes_map_queues {
enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
uint32_t reserved2:15;
enum mes_map_queues_queue_type_vi_enum queue_type:3;
enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
uint32_t reserved3:2;
enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
uint32_t num_queues:3;
} bitfields2;

View File

@@ -59,6 +59,7 @@
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
@@ -160,6 +161,11 @@ extern int noretry;
*/
extern int halt_if_hws_hang;
/*
* Whether MEC FW support GWS barriers
*/
extern bool hws_gws_support;
enum cache_policy {
cache_policy_coherent,
cache_policy_noncoherent
@@ -188,6 +194,7 @@ struct kfd_device_info {
bool needs_iommu_device;
bool needs_pci_atomics;
unsigned int num_sdma_engines;
unsigned int num_xgmi_sdma_engines;
unsigned int num_sdma_queues_per_engine;
};
@@ -258,7 +265,7 @@ struct kfd_dev {
bool interrupts_active;
/* Debug manager */
struct kfd_dbgmgr *dbgmgr;
struct kfd_dbgmgr *dbgmgr;
/* Firmware versions */
uint16_t mec_fw_version;
@@ -282,6 +289,9 @@ struct kfd_dev {
/* Compute Profile ref. count */
atomic_t compute_profile;
/* Global GWS resource shared b/t processes*/
void *gws;
};
enum kfd_mempool {
@@ -329,7 +339,8 @@ enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE,
KFD_QUEUE_TYPE_SDMA,
KFD_QUEUE_TYPE_HIQ,
KFD_QUEUE_TYPE_DIQ
KFD_QUEUE_TYPE_DIQ,
KFD_QUEUE_TYPE_SDMA_XGMI
};
enum kfd_queue_format {
@@ -444,6 +455,9 @@ struct queue_properties {
*
* @device: The kfd device that created this queue.
*
* @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
* otherwise.
*
* This structure represents user mode compute queues.
* It contains all the necessary data to handle such queues.
*
@@ -465,6 +479,7 @@ struct queue {
struct kfd_process *process;
struct kfd_dev *device;
void *gws;
};
/*
@@ -475,6 +490,7 @@ enum KFD_MQD_TYPE {
KFD_MQD_TYPE_HIQ, /* for hiq */
KFD_MQD_TYPE_CP, /* for cp queues and diq */
KFD_MQD_TYPE_SDMA, /* for sdma queues */
KFD_MQD_TYPE_DIQ, /* for diq */
KFD_MQD_TYPE_MAX
};
@@ -819,8 +835,6 @@ void uninit_queue(struct queue *q);
void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
@@ -859,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws);
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
unsigned int qid);
int pqm_get_wave_state(struct process_queue_manager *pqm,

View File

@@ -26,6 +26,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_priv.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
static inline struct process_queue_node *get_queue_by_qid(
struct process_queue_manager *pqm, unsigned int qid)
@@ -74,6 +75,55 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
pdd->already_dequeued = true;
}
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws)
{
struct kfd_dev *dev = NULL;
struct process_queue_node *pqn;
struct kfd_process_device *pdd;
struct kgd_mem *mem = NULL;
int ret;
pqn = get_queue_by_qid(pqm, qid);
if (!pqn) {
pr_err("Queue id does not match any known queue\n");
return -EINVAL;
}
if (pqn->q)
dev = pqn->q->device;
if (WARN_ON(!dev))
return -ENODEV;
pdd = kfd_get_process_device_data(dev, pqm->process);
if (!pdd) {
pr_err("Process device data doesn't exist\n");
return -EINVAL;
}
/* Only allow one queue per process can have GWS assigned */
if (gws && pdd->qpd.num_gws)
return -EINVAL;
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
gws, &mem);
else
ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
pqn->q->gws);
if (unlikely(ret))
return ret;
pqn->q->gws = mem;
pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0;
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q);
}
void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
{
struct kfd_process_device *pdd;
@@ -186,8 +236,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) {
case KFD_QUEUE_TYPE_SDMA:
if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) {
pr_err("Over-subscription is not allowed for SDMA.\n");
case KFD_QUEUE_TYPE_SDMA_XGMI:
if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count
>= get_num_sdma_queues(dev->dqm)) ||
(type == KFD_QUEUE_TYPE_SDMA_XGMI &&
dev->dqm->xgmi_sdma_queue_count
>= get_num_xgmi_sdma_queues(dev->dqm))) {
pr_debug("Over-subscription is not allowed for SDMA.\n");
retval = -EPERM;
goto err_create_queue;
}
@@ -325,6 +380,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (retval != -ETIME)
goto err_destroy_queue;
}
if (pqn->q->gws) {
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
pqn->q->gws);
pdd->qpd.num_gws = 0;
}
kfree(pqn->q->properties.cu_mask);
pqn->q->properties.cu_mask = NULL;
uninit_queue(pqn->q);
@@ -446,6 +508,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
q = pqn->q;
switch (q->properties.type) {
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
seq_printf(m, " SDMA queue on device %x\n",
q->device->id);
mqd_type = KFD_MQD_TYPE_SDMA;
@@ -461,8 +524,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
q->properties.type, q->device->id);
continue;
}
mqd_mgr = q->device->dqm->ops.get_mqd_manager(
q->device->dqm, mqd_type);
mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
} else if (pqn->kq) {
q = pqn->kq->queue;
mqd_mgr = pqn->kq->mqd_mgr;
@@ -470,7 +532,6 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
case KFD_QUEUE_TYPE_DIQ:
seq_printf(m, " DIQ on device %x\n",
pqn->kq->dev->id);
mqd_type = KFD_MQD_TYPE_HIQ;
break;
default:
seq_printf(m,

View File

@@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.lds_size_in_kb);
sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
dev->node_props.gds_size_in_kb);
sysfs_show_32bit_prop(buffer, "num_gws",
dev->node_props.num_gws);
sysfs_show_32bit_prop(buffer, "wave_front_size",
dev->node_props.wave_front_size);
sysfs_show_32bit_prop(buffer, "array_count",
@@ -476,6 +478,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.drm_render_minor);
sysfs_show_64bit_prop(buffer, "hive_id",
dev->node_props.hive_id);
sysfs_show_32bit_prop(buffer, "num_sdma_engines",
dev->node_props.num_sdma_engines);
sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines",
dev->node_props.num_sdma_xgmi_engines);
if (dev->gpu) {
log_max_watch_addr =
@@ -1078,8 +1084,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
local_mem_info.local_mem_size_public;
buf[0] = gpu->pdev->devfn;
buf[1] = gpu->pdev->subsystem_vendor;
buf[2] = gpu->pdev->subsystem_device;
buf[1] = gpu->pdev->subsystem_vendor |
(gpu->pdev->subsystem_device << 16);
buf[2] = pci_domain_nr(gpu->pdev->bus);
buf[3] = gpu->pdev->device;
buf[4] = gpu->pdev->bus->number;
buf[5] = lower_32_bits(local_mem_size);
@@ -1281,6 +1288,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
gpu->shared_resources.drm_render_minor;
dev->node_props.hive_id = gpu->hive_id;
dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
dev->node_props.num_sdma_xgmi_engines =
gpu->device_info->num_xgmi_sdma_engines;
dev->node_props.num_gws = (hws_gws_support &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
@@ -1298,6 +1311,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
pr_debug("Adding doorbell packet type capability\n");
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &

View File

@@ -65,6 +65,7 @@ struct kfd_node_properties {
uint32_t max_waves_per_simd;
uint32_t lds_size_in_kb;
uint32_t gds_size_in_kb;
uint32_t num_gws;
uint32_t wave_front_size;
uint32_t array_count;
uint32_t simd_arrays_per_engine;
@@ -78,6 +79,8 @@ struct kfd_node_properties {
uint32_t max_engine_clk_fcompute;
uint32_t max_engine_clk_ccompute;
int32_t drm_render_minor;
uint32_t num_sdma_engines;
uint32_t num_sdma_xgmi_engines;
uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};