MIPS: barrier: Add __SYNC() infrastructure

Introduce an asm/sync.h header which provides infrastructure that can be
used to generate sync instructions of various types, and for various
reasons. For example if we need a sync instruction that provides a full
completion barrier but only on systems which have weak memory ordering,
we can generate the appropriate assembly code using:

  __SYNC(full, weak_ordering)

When the kernel is configured to run on systems with weak memory
ordering (ie. CONFIG_WEAK_ORDERING is selected) we'll emit a sync
instruction. When the kernel is configured to run on systems with strong
memory ordering (ie. CONFIG_WEAK_ORDERING is not selected) we'll emit
nothing. The caller doesn't need to know which happened - it simply says
what it needs & when, with no concern for checking the kernel
configuration.

There are some scenarios in which we may want to emit code only when we
*didn't* emit a sync instruction. For example, some Loongson3 CPUs
suffer from a bug that requires us to emit a sync instruction prior to
each ll instruction (enabled by CONFIG_CPU_LOONGSON3_WORKAROUNDS). In
cases where this bug workaround is enabled, it's wasteful to then have
more generic code emit another sync instruction to provide barriers we
need in general. A __SYNC_ELSE() macro allows for this, providing an
extra argument that contains code to be assembled only in cases where
the sync instruction was not emitted. For example if we have a scenario
in which we generally want to emit a release barrier but for affected
Loongson3 configurations upgrade that to a full completion barrier, we
can do that like so:

  __SYNC_ELSE(full, loongson3_war, __SYNC(rl, always))

The assembly generated by these macros can be used either as inline
assembly or in assembly source files.

Differing types of sync as provided by MIPSr6 are defined, but currently
they all generate a full completion barrier except in kernels configured
for Cavium Octeon systems. There the wmb sync-type is used, and rmb
syncs are omitted, as has been the case since commit 6b07d38aaa
("MIPS: Octeon: Use optimized memory barrier primitives."). Using
__SYNC() with the wmb or rmb types will abstract away the Octeon
specific behavior and allow us to later clean up asm/barrier.h code that
currently includes a plethora of #ifdef's.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: linux-kernel@vger.kernel.org
This commit is contained in:
Paul Burton
2019-10-01 21:53:07 +00:00
parent ef85d057a6
commit bf92927251
3 changed files with 219 additions and 121 deletions

View File

@@ -307,7 +307,7 @@ static int cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
}
/* Barrier ensuring previous cache invalidates are complete */
uasm_i_sync(pp, STYPE_SYNC);
uasm_i_sync(pp, __SYNC_full);
uasm_i_ehb(pp);
/* Check whether the pipeline stalled due to the FSB being full */
@@ -397,7 +397,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
if (coupled_coherence) {
/* Increment ready_count */
uasm_i_sync(&p, STYPE_SYNC_MB);
uasm_i_sync(&p, __SYNC_mb);
uasm_build_label(&l, p, lbl_incready);
uasm_i_ll(&p, t1, 0, r_nc_count);
uasm_i_addiu(&p, t2, t1, 1);
@@ -406,7 +406,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
uasm_i_addiu(&p, t1, t1, 1);
/* Barrier ensuring all CPUs see the updated r_nc_count value */
uasm_i_sync(&p, STYPE_SYNC_MB);
uasm_i_sync(&p, __SYNC_mb);
/*
* If this is the last VPE to become ready for non-coherence
@@ -473,7 +473,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
Index_Writeback_Inv_D, lbl_flushdcache);
/* Barrier ensuring previous cache invalidates are complete */
uasm_i_sync(&p, STYPE_SYNC);
uasm_i_sync(&p, __SYNC_full);
uasm_i_ehb(&p);
if (mips_cm_revision() < CM_REV_CM3) {
@@ -487,7 +487,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
uasm_i_lw(&p, t0, 0, r_pcohctl);
/* Barrier to ensure write to coherence control is complete */
uasm_i_sync(&p, STYPE_SYNC);
uasm_i_sync(&p, __SYNC_full);
uasm_i_ehb(&p);
}
@@ -534,7 +534,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
}
/* Barrier to ensure write to CPC command is complete */
uasm_i_sync(&p, STYPE_SYNC);
uasm_i_sync(&p, __SYNC_full);
uasm_i_ehb(&p);
}
@@ -572,13 +572,13 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
uasm_i_lw(&p, t0, 0, r_pcohctl);
/* Barrier to ensure write to coherence control is complete */
uasm_i_sync(&p, STYPE_SYNC);
uasm_i_sync(&p, __SYNC_full);
uasm_i_ehb(&p);
if (coupled_coherence && (state == CPS_PM_NC_WAIT)) {
/* Decrement ready_count */
uasm_build_label(&l, p, lbl_decready);
uasm_i_sync(&p, STYPE_SYNC_MB);
uasm_i_sync(&p, __SYNC_mb);
uasm_i_ll(&p, t1, 0, r_nc_count);
uasm_i_addiu(&p, t2, t1, -1);
uasm_i_sc(&p, t2, 0, r_nc_count);
@@ -586,7 +586,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
uasm_i_andi(&p, v0, t1, (1 << fls(smp_num_siblings)) - 1);
/* Barrier ensuring all CPUs see the updated r_nc_count value */
uasm_i_sync(&p, STYPE_SYNC_MB);
uasm_i_sync(&p, __SYNC_mb);
}
if (coupled_coherence && (state == CPS_PM_CLOCK_GATED)) {
@@ -608,7 +608,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
uasm_build_label(&l, p, lbl_secondary_cont);
/* Barrier ensuring all CPUs see the updated r_nc_count value */
uasm_i_sync(&p, STYPE_SYNC_MB);
uasm_i_sync(&p, __SYNC_mb);
}
/* The core is coherent, time to return to C code */