--- linux-2.4/include/net/ip_vs.h 2003-11-29 03:26:21.000000000 +0900 +++ linux-2.4.new/include/net/ip_vs.h 2004-02-23 14:59:35.000000000 +0900 @@ -26,6 +26,18 @@ #define IP_VS_DEST_F_AVAILABLE 0x0001 /* Available tag */ /* + * Advisoary flags for slow start + * The absolute value size of the weight change will be stored + * in dest->slow_start_data. + * The flag and slow_start_data may be used and modified by the scheduler + * to effect slow start + */ +#define IP_VS_DEST_F_WEIGHT_INC 0x0002 /* Weight has been increaced */ +#define IP_VS_DEST_F_WEIGHT_DEC 0x0004 /* Weight has been increaced */ +#define IP_VS_DEST_F_WEIGHT_MASK \ + (IP_VS_DEST_F_WEIGHT_INC|IP_VS_DEST_F_WEIGHT_DEC) + +/* * IPVS sync daemon states */ #define IP_VS_STATE_NONE 0 /* daemon is stopped */ @@ -317,6 +329,7 @@ enum { NET_IPV4_VS_EXPIRE_NODEST_CONN=23, NET_IPV4_VS_SYNC_THRESHOLD=24, NET_IPV4_VS_NAT_ICMP_SEND=25, + NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE=26, NET_IPV4_VS_LAST }; @@ -498,6 +511,10 @@ struct ip_vs_dest { __u32 vaddr; /* IP address for virtual service */ __u16 vport; /* port number for the service */ __u32 vfwmark; /* firewall mark of the service */ + + /* for slow start */ + atomic_t slow_start_data; + atomic_t slow_start_data2; }; @@ -702,6 +719,7 @@ extern int sysctl_ip_vs_cache_bypass; extern int sysctl_ip_vs_expire_nodest_conn; extern int sysctl_ip_vs_sync_threshold; extern int sysctl_ip_vs_nat_icmp_send; +extern int sysctl_ip_vs_expire_quiescent_template; extern struct ip_vs_stats ip_vs_stats; extern struct ip_vs_service *ip_vs_service_get(__u32 fwmark, --- linux-2.4/net/ipv4/ipvs/ip_vs_ctl.c 2003-11-29 03:26:21.000000000 +0900 +++ linux-2.4.new/net/ipv4/ipvs/ip_vs_ctl.c 2004-02-23 15:41:42.000000000 +0900 @@ -79,6 +79,7 @@ int sysctl_ip_vs_cache_bypass = 0; int sysctl_ip_vs_expire_nodest_conn = 0; int sysctl_ip_vs_sync_threshold = 3; int sysctl_ip_vs_nat_icmp_send = 0; +int sysctl_ip_vs_expire_quiescent_template = 0; #ifdef CONFIG_IP_VS_DEBUG static int sysctl_ip_vs_debug_level = 0; @@ -670,6 +671,20 @@ static void __ip_vs_update_dest(struct i struct ip_vs_rule_user *ur) { int conn_flags; + int old_weight; + + /* Set hints for slow start */ + dest->flags |= IP_VS_DEST_F_WEIGHT_MASK; + dest->flags ^= IP_VS_DEST_F_WEIGHT_MASK; + old_weight = atomic_read(&dest->weight); + if (old_weight < ur->weight) { + atomic_set(&dest->slow_start_data, ur->weight - old_weight); + dest->flags |= IP_VS_DEST_F_WEIGHT_INC; + } + else if (old_weight > ur->weight) { + atomic_set(&dest->slow_start_data, old_weight - ur->weight); + dest->flags |= IP_VS_DEST_F_WEIGHT_DEC; + } /* * Set the weight and the flags @@ -1436,6 +1451,9 @@ static struct ip_vs_sysctl_table ipv4_vs {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send", &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template", + &sysctl_ip_vs_expire_quiescent_template, sizeof(int), 0644, NULL, + &proc_dointvec}, {0}}, {{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars}, {0}}, --- linux-2.4/net/ipv4/ipvs/ip_vs_wlc.c 2003-11-29 03:26:21.000000000 +0900 +++ linux-2.4.new/net/ipv4/ipvs/ip_vs_wlc.c 2004-02-23 15:48:45.000000000 +0900 @@ -51,9 +51,116 @@ ip_vs_wlc_update_svc(struct ip_vs_servic return 0; } +static void +ip_vs_wlc_set_slow_start(struct ip_vs_dest *dest, struct ip_vs_service *svc) +{ + __u32 ss_handicap; + __u32 ss_shift; + __u32 ndest; + __u32 w = 0; + __u32 dest_w = 0; + struct list_head *l, *e; + struct ip_vs_dest *d; + + /* If the weight is zero just set the slow_start hint + * and data to zero too as they won't be used */ + + if((dest->flags & IP_VS_DEST_F_WEIGHT_DEC) || + !(dest_w = atomic_read(&dest->weight))) { + IP_VS_DBG(1, "slow_start: null\n"); + atomic_set(&dest->slow_start_data, 0); + atomic_set(&dest->slow_start_data2, 0); + return; + } + + /* Calculate a weighted number of connections + * this server would have if all the currently + * active connections were redistributed limited + * to a maximum of 64k */ + l = &svc->destinations; + ss_handicap = 0; + ndest = 0; + for (e=l->next; e!=l; e=e->next) { + d = list_entry(e, struct ip_vs_dest, n_list); + w = atomic_read(&d->weight); + if (w < 1 || d == dest) { + continue; + } + ndest++; + + /* Try to avoid overflowint ss_handicap */ + ss_shift = atomic_read(&d->activeconns); + if(ss_shift & 0xffff0000) + ss_shift = 0xffff; + ss_shift = (ss_shift << 16 ) / (w & 0xffff); + + if (~0L - ss_handicap < ss_shift) { + ss_handicap = ~0L; + break; + } + ss_handicap += ss_shift; + } + if (ndest) + ss_handicap = (ss_handicap * dest_w / ndest) >> 16; + + /* ss_shift = log_2((ss_handicap & 0xfff) >> 3) */ + if (ss_handicap) { + __u32 i; + ss_shift = ss_handicap;; + for (i = 12; i > 0; i--) { + if(ss_shift & 0x8000) + break; + ss_shift <<= 1; + } + ss_shift = i; + ss_handicap <<= ss_shift; + } + else + ss_shift = 0; + + atomic_set(&dest->slow_start_data, ss_handicap); + atomic_set(&dest->slow_start_data2, ss_shift); + + IP_VS_DBG(1, "WLC slow_start_init: server %u.%u.%u.%u:%u " + "handicap=%u (%u) shift=%u ndest=%u\n", + NIPQUAD(dest->addr), ntohs(dest->port), + ss_handicap, ss_handicap >> ss_shift, ss_shift, ndest); +} + + + +static inline unsigned int +ip_vs_wlc_slowlstart_dest_handicap(struct ip_vs_dest *dest, + struct ip_vs_service *svc) +{ + unsigned int handicap; + + + /* Set up slow_start if weight has recently changed */ + if (unlikely(dest->flags & IP_VS_DEST_F_WEIGHT_MASK)) { + ip_vs_wlc_set_slow_start(dest, svc); + dest->flags |= IP_VS_DEST_F_WEIGHT_MASK; + dest->flags ^= IP_VS_DEST_F_WEIGHT_MASK; + } + + handicap = atomic_read(&dest->slow_start_data); + if (unlikely(!handicap)) + return 0; + + handicap--; + atomic_set(&dest->slow_start_data, handicap); + +#ifdef CONFIG_IP_VS_DEBUG + if (unlikely(!handicap)) + IP_VS_DBG(1, "WLC slow_start_end: server %u.%u.%u.%u:%u\n", + NIPQUAD(dest->addr), ntohs(dest->port)); +#endif + + return handicap >> atomic_read(&dest->slow_start_data2); +} static inline unsigned int -ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest) +ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest, struct ip_vs_service *svc) { /* * We think the overhead of processing active connections is 256 @@ -62,7 +169,8 @@ ip_vs_wlc_dest_overhead(struct ip_vs_des * use the following formula to estimate the overhead now: * dest->activeconns*256 + dest->inactconns */ - return (atomic_read(&dest->activeconns) << 8) + + return ((atomic_read(&dest->activeconns) + + ip_vs_wlc_slowlstart_dest_handicap(dest, svc)) << 8) + atomic_read(&dest->inactconns); } @@ -96,7 +204,7 @@ ip_vs_wlc_schedule(struct ip_vs_service for (e=l->next; e!=l; e=e->next) { least = list_entry(e, struct ip_vs_dest, n_list); if (atomic_read(&least->weight) > 0) { - loh = ip_vs_wlc_dest_overhead(least); + loh = ip_vs_wlc_dest_overhead(least, svc); goto nextstage; } } @@ -109,7 +217,7 @@ ip_vs_wlc_schedule(struct ip_vs_service for (e=e->next; e!=l; e=e->next) { dest = list_entry(e, struct ip_vs_dest, n_list); - doh = ip_vs_wlc_dest_overhead(dest); + doh = ip_vs_wlc_dest_overhead(dest, svc); if (loh * atomic_read(&dest->weight) > doh * atomic_read(&least->weight)) { least = dest;