mirror of
https://github.com/torvalds/linux.git
synced 2024-11-01 04:53:36 +01:00
sch_cake: constify inverse square root cache
sch_cake uses a cache of the first 16 values of the inverse square root calculation for the Cobalt AQM to save some cycles on the fast path. This cache is populated when the qdisc is first loaded, but there's really no reason why it can't just be pre-populated. So change it to be pre-populated with constants, which also makes it possible to constify it. This gives a modest space saving for the module (not counting debug data): .text: -224 bytes .rodata: +80 bytes .bss: -64 bytes Total: -192 bytes Signed-off-by: Dave Taht <dave.taht@gmail.com> [ fixed up comment, rewrote commit message ] Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://patch.msgid.link/20240909091630.22177-1-toke@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
3f464b193d
commit
c48994baef
1 changed files with 18 additions and 35 deletions
|
@ -361,8 +361,24 @@ static const u8 besteffort[] = {
|
|||
static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
static const u8 bulk_order[] = {1, 0, 2, 3};
|
||||
|
||||
/* There is a big difference in timing between the accurate values placed in the
|
||||
* cache and the approximations given by a single Newton step for small count
|
||||
* values, particularly when stepping from count 1 to 2 or vice versa. Hence,
|
||||
* these values are calculated using eight Newton steps, using the
|
||||
* implementation below. Above 16, a single Newton step gives sufficient
|
||||
* accuracy in either direction, given the precision stored.
|
||||
*
|
||||
* The magnitude of the error when stepping up to count 2 is such as to give the
|
||||
* value that *should* have been produced at count 4.
|
||||
*/
|
||||
|
||||
#define REC_INV_SQRT_CACHE (16)
|
||||
static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
|
||||
static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
|
||||
~0, ~0, 3037000500, 2479700525,
|
||||
2147483647, 1920767767, 1753413056, 1623345051,
|
||||
1518500250, 1431655765, 1358187914, 1294981364,
|
||||
1239850263, 1191209601, 1147878294, 1108955788
|
||||
};
|
||||
|
||||
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
|
||||
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
|
||||
|
@ -388,47 +404,14 @@ static void cobalt_newton_step(struct cobalt_vars *vars)
|
|||
static void cobalt_invsqrt(struct cobalt_vars *vars)
|
||||
{
|
||||
if (vars->count < REC_INV_SQRT_CACHE)
|
||||
vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
|
||||
vars->rec_inv_sqrt = inv_sqrt_cache[vars->count];
|
||||
else
|
||||
cobalt_newton_step(vars);
|
||||
}
|
||||
|
||||
/* There is a big difference in timing between the accurate values placed in
|
||||
* the cache and the approximations given by a single Newton step for small
|
||||
* count values, particularly when stepping from count 1 to 2 or vice versa.
|
||||
* Above 16, a single Newton step gives sufficient accuracy in either
|
||||
* direction, given the precision stored.
|
||||
*
|
||||
* The magnitude of the error when stepping up to count 2 is such as to give
|
||||
* the value that *should* have been produced at count 4.
|
||||
*/
|
||||
|
||||
static void cobalt_cache_init(void)
|
||||
{
|
||||
struct cobalt_vars v;
|
||||
|
||||
memset(&v, 0, sizeof(v));
|
||||
v.rec_inv_sqrt = ~0U;
|
||||
cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
|
||||
|
||||
for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
|
||||
cobalt_newton_step(&v);
|
||||
cobalt_newton_step(&v);
|
||||
cobalt_newton_step(&v);
|
||||
cobalt_newton_step(&v);
|
||||
|
||||
cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
|
||||
}
|
||||
}
|
||||
|
||||
static void cobalt_vars_init(struct cobalt_vars *vars)
|
||||
{
|
||||
memset(vars, 0, sizeof(*vars));
|
||||
|
||||
if (!cobalt_rec_inv_sqrt_cache[0]) {
|
||||
cobalt_cache_init();
|
||||
cobalt_rec_inv_sqrt_cache[0] = ~0;
|
||||
}
|
||||
}
|
||||
|
||||
/* CoDel control_law is t + interval/sqrt(count)
|
||||
|
|
Loading…
Reference in a new issue