#!/usr/bin/perl -w strict #/************************************************************************ # * LVS wieght and pageing control script for SCF's Virtual Computer Lab * # * Version: 0.3.6 * # * writen by: Joseph T. Duncan * # * duncanjo@ucs.orst.edu * # ************************************************************************/ # changelog: # 0.3.7: updates to make pageing with t-mobile happy # 0.3.6: realserver up/down and bulk of pageing code for failer of a realserver added # (you need your own external monitoring for if the loadbalancer(s) dies) # 0.3.5: rewrite using Hash of Hashes for primary data structure (solves 0.3.2 bug with clean approach) # 0.3.3/4: abstraction added to make configuration/moving easier # 0.3.2: bug found/fixed but with some ugly code (syncing data sources bug) #/***************** # * rrd skelitons * # *****************/ # rrd for loadbalancer #rrdtool create lvs_box_dns_name.rrd --start N --step 60 DS:Proc_0:GAUGE:120:-1:101 DS:Proc_1:GAUGE:120:-1:101 DS:avg_cpu_load:GAUGE:120:-1:101 DS:num_users:GAUGE:120:-1:256 DS:free_mem:GAUGE:120:-1:8193 DS:inet_recieved:GAUGE:120:-1:1000000000 DS:inet_sent:GAUGE:120:0:1000000000 RRA:AVERAGE:0.5:1:262800 RRA:AVERAGE:0.5:6:43800 RRA:AVERAGE:0.5:24:109050 RRA:AVERAGE:0.5:288:913 # rrd for ts boxes #rrdtool create ts_box(s)_dns_name.rrd --start N --step 60 DS:Proc_0:GAUGE:120:-1:101 DS:Proc_1:GAUGE:120:-1:101 DS:Proc_2:GAUGE:120:-1:101 DS:Proc_3:GAUGE:120:-1:101 DS:avg_cpu_load:GAUGE:120:-1:101 DS:num_users:GAUGE:120:-1:256 DS:free_mem:GAUGE:120:-1:8193 DS:inet_recieved:GAUGE:120:-1:1000000000 DS:inet_sent:GAUGE:120:0:1000000000 RRA:AVERAGE:0.5:1:262800 RRA:AVERAGE:0.5:6:43800 RRA:AVERAGE:0.5:24:109050 RRA:AVERAGE:0.5:288:913 #/****************** # * page skelitons * # ******************/ # echo '0' > ts_box(s)_dns_name.state #/***************** # * CONFIGURATION * # *****************/ # START CONFIGURATION SECTION #location for ipvsadm my $ipvsadm = "/sbin/ipvsadm"; #path to ipvsadm and exicutable # snmp locations of meh my $snmpget = "/usr/bin/snmpget"; #path to snmpget and exicutable my $snmpwalk = "/usr/bin/snmpwalk"; #path to snmpwalk and exicutable my $community = "password"; #snmp community name my $rrd = "/usr/bin/rrdtool"; #path to rrdtool and exicutable my $rrd_dir = "/usr/local/stats/"; #path to where rrd stores its data my $mail = "/usr/lib/sendmail"; #path to sendmail for pages my $pagelist = "foo@bar"; #list of people to send pages to comma dilimited if multiple recpients my $uri = "http://foobar.com";#webpage to include with email my $date = `/bin/date`; my $load_balancer = "dns name"; #Hostname of your load balancer, Should be your VIP! my $load_balancer_ip = "000.000.000.000"; #IP address of your load balancer, REPLACE WITH CODE TO GET IP FROM HOSTNAME *DEPRECIATED* my $num_realservers = 1; #Total number of real servers my @host = ("A.foobar.com", "B.foobar.com", "C.foobar.com"); #Hostsnames of each real server, Should be your RIP(s)! my @host_ip = ("000.000.000.003", "000.000.000.002", "000.000.000.1"); #IP address of each real server *REPLACE WITH CODE TO GET FROM IP FROM HOSTNAME* *DEPRECIATED* #END CONFIGURATION SECTION #Hash for real server data collection, per real server my ( $host, $host_ip, $host_ip_hex, $new_weight, $avg_cpu, $avg_free_mem, $total_con, $current_weight); #Hash of real server data collection Hashes my %real_servers = (); #Initialize non-dependent loop variables (variables that do not depend on which loop iteration we are in) my $user_total = 0; #init inaacurate total across all 3 real servers my $update_ip = 0; #init ip we are going to update *DEPRECIATED* *FLAGED FOR REMOVAL ON THIS VERSION #/********************************************** # * LOCAL DATA collection (Load Balancer Data) * # **********************************************/ #cpu stats *semi bad code* currently only handles dual proc servers my @cpustat = split(/\n/, `/usr/bin/mpstat -P ALL`); my @cpu_avg = split(/\s+/, $cpustat[3]); my @cpu_one = split(/\s+/, $cpustat[4]); my @cpu_two = split(/\s+/, $cpustat[5]); #compute totals my $cpu_a = $cpu_one[2] + $cpu_one[3]; my $cpu_b = $cpu_two[2] + $cpu_two[3]; my $cpu_av = $cpu_avg[2] + $cpu_avg[3]; #collect memory stats: my @memstat = split(/\n/, `/bin/cat /proc/meminfo`); my @mem_real_total = split(/\s+/, $memstat[0]); my @mem_real_free = split(/\s+/, $memstat[1]); my @mem_swap_total = split(/\s+/, $memstat[11]); my @mem_swap_free = split(/\s+/, $memstat[12]); #compute used memory my $mem = $mem_real_total[1] - $mem_real_free[1]; #calculate users thru system and total, grab wweights for adjustment **WARNING: BAD CODE HERE :WARNING** #**NEED BETTER MEATHOD FOR PARSEING DATA FROM /proc/net/ip_vs** #**Needs independence currently is manual count of bad :(** my @ip_vs_output = split(/ |\n/, `cat /proc/net/ip_vs`); my @hex_ip = ($ip_vs_output[28],$ip_vs_output[66],$ip_vs_output[104]); my @current_weight = ($ip_vs_output[37],$ip_vs_output[75],$ip_vs_output[113]); my @active_con = ($ip_vs_output[43],$ip_vs_output[81],$ip_vs_output[119]); my @inactive_con = ($ip_vs_output[53],$ip_vs_output[91],$ip_vs_output[129]); for($index = 0; $index < $num_realservers; $index++){ $real_servers{$hex_ip[$index]}{'total_con'} = $active_con[$index] + $inactive_con[$index]; #store connections for rrd //not exactly accurrate// $real_servers{$hex_ip[$index]}{'current_weight'} = $current_weight[$index]; #store current weight for comparison later to determine if we preform update $user_total += $active_con[$index] + $inactive_con[$index]; #store total connections thru LVS for rrd //not exactly accurrate// } #add code to collect local network traffic thru loadballancer: to be added later (non-critical, super low priority) #Store local information into RRD database for pretty graphs and statistical reasons `$rrd update $rrd_dir$load_balancer.rrd "N:$cpu_a:$cpu_b:$cpu_av:$mem:$user_total:0:0"`; #/********************************************* # * REMOTE DATA collection (Real Server Data) * # *********************************************/ #Populate %real_servers with known data && snmp data for($index = 0; $index < $num_realservers; $index++){ #itterate through list of real servers my $host_ip_hex = ''; #init/reset per itteration hex representation of ip address (key for hash) foreach $octet (split(/\./, $host_ip[$index], 4)){ #split ip address into octets $host_ip_hex .= sprintf("%02x",$octet); #combin octets into single hexidecimal value } $host_ip_hex =~ tr/a-z/A-Z/; #convert to uppercase $host_ip_hex .= ":0D3D"; #append port to make later comparison easier $real_servers{$host_ip_hex}{'host'} = $host[$index]; $real_servers{$host_ip_hex}{'host_ip'} = $host_ip[$index]; $real_servers{$host_ip_hex}{'host_ip_hex'} = $host_ip_hex; use Net::Ping; #load net::ping $p = Net::Ping->new(); #prepare new ping object my $serverup_path = "$rrd_dir" . "$real_servers{$host_ip_hex}{'host'}" . ".state"; #path&file storeing up/down state between runs open(SERVERUP, "<$serverup_path"); #read alarmstate (aka did i just send a email the last time i ran? my $serverup = ; #***hmm how does one make it create the file if it doesnt exist?*** close(SERVERUP); if ($p->ping($real_servers{$host_ip_hex}{'host_ip'})) { #Test if host is up, if it is continue on with proccessing #if nolonger in alarm state reset state variable if ($serverup >= 1) { #send email about cleared real server IF real server was down for 5 or more minutes if($serverup >= 5){ open(MAIL,"|$mail $pagelist") || die("Could not execute\"$mail\""); print MAIL "X-Mailer: Load Balancer at $load_balancer\n"; print MAIL "Subject: $real_servers{$host_ip_hex}{'host'} up at $date after $serverup minutes\n"; print MAIL "From: root\@$load_balancer\n"; print MAIL "Precedence: bulk\n"; print MAIL "$real_servers{$host_ip_hex}{'host'}'s services are back\n"; print MAIL "$real_servers{$host_ip_hex}{'host'} was down for $serverup minutes\n"; print MAIL "$uri"; close(MAIL); } #clear server up state since server is restored open(SERVERUP, ">$serverup_path"); print SERVERUP "0"; close(SERVERUP); } #get cpu stats from server - this needs to be updated to avoid m$ magic proccessor incriment bug - painful code with subparseing snmpwalk data block :( my @cpu_0 = split(/ |\n/, `$snmpget -c $community -v 2c $real_servers{$host_ip_hex}{'host_ip'} HOST-RESOURCES-MIB::hrProcessorLoad.5`); my @cpu_1 = split(/ |\n/, `$snmpget -c $community -v 2c $real_servers{$host_ip_hex}{'host_ip'} HOST-RESOURCES-MIB::hrProcessorLoad.6`); my @cpu_2 = split(/ |\n/, `$snmpget -c $community -v 2c $real_servers{$host_ip_hex}{'host_ip'} HOST-RESOURCES-MIB::hrProcessorLoad.3`); my @cpu_3 = split(/ |\n/, `$snmpget -c $community -v 2c $real_servers{$host_ip_hex}{'host_ip'} HOST-RESOURCES-MIB::hrProcessorLoad.4`); #calculate avg $real_servers{$host_ip_hex}{'avg_cpu'} = ($cpu_0[3] + $cpu_1[3] + $cpu_2[3] + $cpu_3[3])*(1/4); #get free memory my @free_mem = split(/ |\n/, `$snmpget -c $community -v 2c $host_ip[$index] 1.3.6.1.4.1.9600.1.1.2.3.0`); $real_servers{$host_ip_hex}{'avg_free_mem'} = $free_mem[3]; #get network traffic in/out my @inet_recieved = split(/ |\n/, `$snmpwalk -c $community -v 2c $host_ip[$index] 1.3.6.1.4.1.9600.1.1.3.1.2`); my @inet_sent = split(/ |\n/, `$snmpwalk -c $community -v 2c $host_ip[$index] 1.3.6.1.4.1.9600.1.1.3.1.3`); #store this realserver's information into RRD database for pretty graphs and statistical reasons `$rrd update $rrd_dir$real_servers{$host_ip_hex}{'host'}.rrd "N:$cpu_0[3]:$cpu_1[3]:$cpu_2[3]:$cpu_3[3]:$real_servers{$host_ip_hex}{'avg_cpu'}:$free_mem[3]:$real_servers{$host_ip_hex}{'total_con'}:$inet_recieved[11]:$inet_sent[11]"`; }else{ #Host was not up, skipped all snmp && loging stuffs if ($serverup == 5){#has the host ben down for 5minutes? if yes send email! open(MAIL,"|$mail $pagelist") || die("Could not execute\"$mail\""); print MAIL "X-Mailer: Load Balancer at $load_balancer\n"; print MAIL "Subject: $real_servers{$host_ip_hex}{'host'} down $date\n"; print MAIL "From: root\@$load_balancer\n"; print MAIL "Precedence: bulk\n"; print MAIL "$real_servers{$host_ip_hex}{'host'} has been down for 5 minutes\n"; print MAIL "a further email when the host comes up and will note the duration of downtime\n"; print MAIL "$uri"; close(MAIL); } #Set new serverup so we dont spam whom ever is receiveing these emails $serverup = $serverup + 1; open(SERVERUP, ">$serverup_path"); print SERVERUP "$serverup"; close(SERVERUP); #Server is down ***add more robust logic for takeing server out*** $real_servers{$host_ip_hex}{'avg_cpu'} = 101; $real_servers{$host_ip_hex}{'avg_free_mem'} = 0; } $p->close(); #close ping object } #/**************************************************** # * CRITICAL CODE! Caution * # * Calculate potential new weights for real servers * # * Then Apply them! * # ****************************************************/ for my $update_real_server ( keys %real_servers ){ #calculate potential weight change values if($real_servers{$update_real_server}{'avg_cpu'} >= 75 && $real_servers{$update_real_server}{'avg_free_mem'} <= 512){ $real_servers{$update_real_server}{'$new_weight'} = 1; # Average CPU load High and free memory Low. Flag Realserver least new connections } elsif($real_servers{$update_real_server}{'avg_cpu'} >= 75 && $real_servers{$update_real_server}{'avg_free_mem'} > 512){ $real_servers{$update_real_server}{'new_weight'} = 3; # Average CPU load High. Flag Realserver to recieve reduced new connections } elsif($real_servers{$update_real_server}{'avg_cpu'} < 75 && $real_servers{$update_real_server}{'avg_free_mem'} <= 512){ $real_servers{$update_real_server}{'new_weight'} = 3; # Average Free memory Low. Flag Realserver to recieve reduced new connections } else{ $real_servers{$update_real_server}{'new_weight'} = 9; # Normal operation } #do we make a weight change? if($real_servers{$update_real_server}{'current_weight'} != 0 && $real_servers{ $update_real_server }{'current_weight'} != $real_servers{$update_real_server}{'new_weight'}){ `$ipvsadm -e -t $load_balancer_ip:3389 -r $real_servers{$update_real_server}{'host_ip'}:3389 -w $real_servers{$update_real_server}{'new_weight'}`; # yes so update! } } #add output to /var/www/htdocs for state information on which picture set to display