copypage_power7.S 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /*
  3. *
  4. * Copyright (C) IBM Corporation, 2012
  5. *
  6. * Author: Anton Blanchard <[email protected]>
  7. */
  8. #include <asm/page.h>
  9. #include <asm/ppc_asm.h>
  10. _GLOBAL(copypage_power7)
  11. /*
  12. * We prefetch both the source and destination using enhanced touch
  13. * instructions. We use a stream ID of 0 for the load side and
  14. * 1 for the store side. Since source and destination are page
  15. * aligned we don't need to clear the bottom 7 bits of either
  16. * address.
  17. */
  18. ori r9,r3,1 /* stream=1 => to */
  19. #ifdef CONFIG_PPC_64K_PAGES
  20. lis r7,0x0E01 /* depth=7
  21. * units/cachelines=512 */
  22. #else
  23. lis r7,0x0E00 /* depth=7 */
  24. ori r7,r7,0x1000 /* units/cachelines=32 */
  25. #endif
  26. ori r10,r7,1 /* stream=1 */
  27. lis r8,0x8000 /* GO=1 */
  28. clrldi r8,r8,32
  29. /* setup read stream 0 */
  30. dcbt 0,r4,0b01000 /* addr from */
  31. dcbt 0,r7,0b01010 /* length and depth from */
  32. /* setup write stream 1 */
  33. dcbtst 0,r9,0b01000 /* addr to */
  34. dcbtst 0,r10,0b01010 /* length and depth to */
  35. eieio
  36. dcbt 0,r8,0b01010 /* all streams GO */
  37. #ifdef CONFIG_ALTIVEC
  38. mflr r0
  39. std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
  40. std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
  41. std r0,16(r1)
  42. stdu r1,-STACKFRAMESIZE(r1)
  43. bl enter_vmx_ops
  44. cmpwi r3,0
  45. ld r0,STACKFRAMESIZE+16(r1)
  46. ld r3,STK_REG(R31)(r1)
  47. ld r4,STK_REG(R30)(r1)
  48. mtlr r0
  49. li r0,(PAGE_SIZE/128)
  50. mtctr r0
  51. beq .Lnonvmx_copy
  52. addi r1,r1,STACKFRAMESIZE
  53. li r6,16
  54. li r7,32
  55. li r8,48
  56. li r9,64
  57. li r10,80
  58. li r11,96
  59. li r12,112
  60. .align 5
  61. 1: lvx v7,0,r4
  62. lvx v6,r4,r6
  63. lvx v5,r4,r7
  64. lvx v4,r4,r8
  65. lvx v3,r4,r9
  66. lvx v2,r4,r10
  67. lvx v1,r4,r11
  68. lvx v0,r4,r12
  69. addi r4,r4,128
  70. stvx v7,0,r3
  71. stvx v6,r3,r6
  72. stvx v5,r3,r7
  73. stvx v4,r3,r8
  74. stvx v3,r3,r9
  75. stvx v2,r3,r10
  76. stvx v1,r3,r11
  77. stvx v0,r3,r12
  78. addi r3,r3,128
  79. bdnz 1b
  80. b exit_vmx_ops /* tail call optimise */
  81. #else
  82. li r0,(PAGE_SIZE/128)
  83. mtctr r0
  84. stdu r1,-STACKFRAMESIZE(r1)
  85. #endif
  86. .Lnonvmx_copy:
  87. std r14,STK_REG(R14)(r1)
  88. std r15,STK_REG(R15)(r1)
  89. std r16,STK_REG(R16)(r1)
  90. std r17,STK_REG(R17)(r1)
  91. std r18,STK_REG(R18)(r1)
  92. std r19,STK_REG(R19)(r1)
  93. std r20,STK_REG(R20)(r1)
  94. 1: ld r0,0(r4)
  95. ld r5,8(r4)
  96. ld r6,16(r4)
  97. ld r7,24(r4)
  98. ld r8,32(r4)
  99. ld r9,40(r4)
  100. ld r10,48(r4)
  101. ld r11,56(r4)
  102. ld r12,64(r4)
  103. ld r14,72(r4)
  104. ld r15,80(r4)
  105. ld r16,88(r4)
  106. ld r17,96(r4)
  107. ld r18,104(r4)
  108. ld r19,112(r4)
  109. ld r20,120(r4)
  110. addi r4,r4,128
  111. std r0,0(r3)
  112. std r5,8(r3)
  113. std r6,16(r3)
  114. std r7,24(r3)
  115. std r8,32(r3)
  116. std r9,40(r3)
  117. std r10,48(r3)
  118. std r11,56(r3)
  119. std r12,64(r3)
  120. std r14,72(r3)
  121. std r15,80(r3)
  122. std r16,88(r3)
  123. std r17,96(r3)
  124. std r18,104(r3)
  125. std r19,112(r3)
  126. std r20,120(r3)
  127. addi r3,r3,128
  128. bdnz 1b
  129. ld r14,STK_REG(R14)(r1)
  130. ld r15,STK_REG(R15)(r1)
  131. ld r16,STK_REG(R16)(r1)
  132. ld r17,STK_REG(R17)(r1)
  133. ld r18,STK_REG(R18)(r1)
  134. ld r19,STK_REG(R19)(r1)
  135. ld r20,STK_REG(R20)(r1)
  136. addi r1,r1,STACKFRAMESIZE
  137. blr