add new scripts
Stephen Soltesz [Tue, 2 Mar 2010 19:30:13 +0000 (19:30 +0000)]
12 files changed:
statistics/functions.r
statistics/harvest_tt_resolve.py
statistics/node_availability.r [new file with mode: 0644]
statistics/node_history_all.r [new file with mode: 0644]
statistics/node_history_may0809.r
statistics/node_history_ttr.r [new file with mode: 0644]
statistics/node_status_jun09feb10.r [new file with mode: 0644]
statistics/operator_overhead.r [new file with mode: 0644]
statistics/rt_data.r
statistics/rt_data_opentickets.r [new file with mode: 0644]
statistics/rt_monitor_data.r
statistics/traffic_and_nodes.r [new file with mode: 0644]

index 3411586..bc6b8d4 100644 (file)
@@ -173,14 +173,15 @@ plot_rt_hist <- function (t, imagename=0)
     if ( imagename != 0 ) { end_image() }
 }
 
-year_hist <- function (t, year, from, to, max, type="week", title="Histogram for Tickets in")
+year_hist <- function (t, year, from, to, max, type="week", title="Histogram for Tickets in", fmt="%b-%d")
 {
     dates <-seq(as.Date(from), as.Date(to), type)
-    months <- format(dates, "%b-%d")
+    months <- format(dates, fmt)
     hbreaks<-unclass(as.POSIXct(dates))
     h<-hist(t$start, breaks=hbreaks, plot=FALSE)
     main<-sprintf(paste(title, "%s: MEAN %s\n"), year, mean(h$counts))
     print(main);
+    print(h$counts);
     if ( max == 0 ) {
         max = max(h$counts)
     }
@@ -190,6 +191,7 @@ year_hist <- function (t, year, from, to, max, type="week", title="Histogram for
     abline(mean(h$counts), 0, col='grey')
     #qqnorm(h$counts)
     #qqline(h$counts)
+    return (h);
 }
 
 year_hist_unique <- function (t, year, from, to, max, type="week", title="Histogram for Tickets in")
@@ -401,3 +403,9 @@ abline_at_date <- function (date, col='black', lty=1, format="%Y-%m-%d")
     abline(v=ts, col=col, lty=lty)
     return (ts);
 }
+
+tstamp <- function (date, format="%Y-%m-%d")
+{
+    ts <- unclass(as.POSIXct(date, format=format, origin="1970-01-01"))[1]
+    return (ts)
+}
index bebff32..c1295a0 100755 (executable)
@@ -37,9 +37,6 @@ count = 0
 for index,node in enumerate(HistoryNodeRecord.query.all()):
        frequency[node.hostname] = 0
 
-
-       #if index > 3: sys.exit(1)
-
        if node.hostname == 'planetlab-02.kyushu.jgn2.jp':
                for h in node.versions:
                        print h.last_checked, h.status
@@ -57,7 +54,6 @@ for index,node in enumerate(HistoryNodeRecord.query.all()):
                count += 1
                pairs.append((datetime.now(), node.versions[-1].last_checked))
        else:
-
                while i > 0:
                        i = find_next(node.versions, i, 'down')
                        i2 = find_next(node.versions, i, 'offline')
@@ -72,7 +68,6 @@ for index,node in enumerate(HistoryNodeRecord.query.all()):
                        frequency[node.hostname] += 1
 
        # list of all times
-
        for p in pairs:
                times.append(diff_time(p[0],p[1]))
 
diff --git a/statistics/node_availability.r b/statistics/node_availability.r
new file mode 100644 (file)
index 0000000..c155015
--- /dev/null
@@ -0,0 +1,269 @@
+source("functions.r");
+
+
+available_nodes <- function (ns, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # get range from ns
+        ns_sub <- ns[which(ns$date > hbreaks[i] & ns$date <= hbreaks[i+1] & ns$status == 'BOOT'),]
+        nodes <- length(ns_sub$date)
+
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, nodes)
+
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,m))
+}
+
+
+
+open_tickets <- function (t, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # identify any tickets with a start time in range, lastreply in range
+        # or where both start is less and lastreply is greater than the range
+        t_sub <- t[which( (t$start < hbreaks[i] & t$lastreply > hbreaks[i+1]) | 
+                          (t$start > hbreaks[i] & t$start <= hbreaks[i+1]) | 
+                          (t$lastreply > hbreaks[i] & t$lastreply <= hbreaks[i+1]) ),]
+        tickets <- length(t_sub$start)
+        #if ( nrow(t_sub) > 0 ){
+        #    for ( j in seq(1,nrow(t_sub)) )
+        #    {
+        #        #print(sprintf("id %s, date %s", t_sub[i,'ticket_id'], t_sub[i,'s1']))
+        #        print(sprintf("id %s, date %s", t_sub[j,]$ticket_id, t_sub[j, 's1']))
+        #    }
+        #}
+
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, tickets)
+
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,m))
+}
+
+online_nodes <- function (fb)
+{
+    breaks <- unique(fb$timestamp)
+    n<-NULL
+    o<-NULL
+    x<-NULL
+    for (i in seq(1,length(breaks)) )
+    {
+        ts <- breaks[i]
+        sub <- fb[which(fb$timestamp == ts),]
+        node_count   <- length(unique(sub$hostname))
+        online_count <- length(unique(sub$hostname[which(sub$state=='BOOT')]))
+        x<-c(x,ts)
+        n<-c(n,node_count)
+        o<-c(o,online_count)
+    }
+    print(length(x))
+    print(length(n))
+    print(length(o))
+    return (rbind(x,n,o))
+}
+
+lowess_smooth <- function (x, y, delta=(60*60*24), f=0.02)
+{
+    a<-lowess(x, y, delta=delta, f=f)
+    return (a);
+}
+
+#####
+
+ns <- read.csv('node-status-jun09-feb10.csv', sep=',', header=TRUE)
+an <- available_nodes(ns, "2009-06-10", "2010-02-28", 'day')
+
+an_x<-an[1,][which(as.numeric(an[2,]) > 100)]
+an_y<-an[2,][which(as.numeric(an[2,]) > 100)]
+
+####
+#fb7 <- read.csv('findbad_raw_2007.csv', sep=',', header=TRUE)
+#fb8 <- read.csv('findbad_raw_2008.csv', sep=',', header=TRUE)
+#fb9 <- read.csv('findbad_raw_2009.csv', sep=',', header=TRUE)
+#fball <- rbind(fb7,fb8,fb9)
+
+z7<- online_nodes(fb7)
+z8<- online_nodes(fb8)
+z9<- online_nodes(fb9)
+
+zx <- c(z7[1,],z8[1,],z9[1,])
+zy_reg <- c(z7[2,], z8[2,],z9[2,])
+zy_avail <- c(z7[3,], z8[3,],z9[3,])
+
+start_image("node_availability.png")
+par(mfrow=c(2,1))
+par(mai=c(0.1,1,0.1,0.1))
+
+a_reg<-lowess_smooth(zx, zy_reg)
+plot(a_reg$x, a_reg$y, 
+     ylim=c(0,700), xlim=c(min(x1[length(x1)/2]), max(x1)), type='l', pch='.', axes=F,
+     ylab="Online Node Count", xlab="")
+       
+sx <- zx[which(zy_avail > 330)]
+sy <- zy_avail[which(zy_avail > 330)]
+sx <- c(sx[1:2037],sx[2061:length(sx)])
+sy <- c(sy[1:2037],sy[2061:length(sy)])
+
+sx <- c(sx[1:1699],sx[1701:1707],sx[1709:length(sx)])
+sy <- c(sy[1:1699],sy[1701:1707],sy[1709:length(sy)])
+
+lines(sx, sy, col='grey80', pch='.')
+lines(an_x, an_y, col='grey80', pch='.')
+
+a_avail<-lowess_smooth(zx, zy_avail)
+lines(a_avail$x, a_avail$y, col='red', pch='.')
+
+a_avail_m3<-lowess_smooth(an_x, an_y)
+lines(a_avail_m3$x, a_avail_m3$y, col='red', pch='.')
+
+axis(2, las=1)
+
+x_online_node_list <- c(tstamp("2004-6-1"), tstamp("2005-6-1"), tstamp("2006-6-1"), tstamp("2007-11-1"))
+y_online_node_list <- c(330, 480,  500,  550)
+lines(x_online_node_list, y_online_node_list, col='grey80')
+
+#abline_at_date('2005-01-01', 'grey60')
+#abline_at_date('2006-01-01', 'grey60')
+#abline_at_date('2007-01-01', 'grey60')
+#abline_at_date('2008-01-01', 'grey60')
+#abline_at_date('2009-01-01', 'grey60')
+#abline_at_date('2010-01-01', 'grey60')
+
+tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=2)
+tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2)
+tstamp_20050615 <-abline_at_date("2005-06-15", col='grey60', lty=2)
+tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=2)
+tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2)
+tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2)
+tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2)
+tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2)
+tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2)
+tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2)
+
+
+text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
+        tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
+        tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
+        tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
+        tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
+        tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
+        tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
+     y=c(700),
+     labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
+
+
+l<-length(ot[3,])
+#axis(1, labels=ot[3,l/2:l], at=ot[1,l/2:l], cex.axis=0.7)
+#axis(2, las=1)
+#mtext("2004           2005           2006           2007           2008           2009", 1,2)
+
+uptime_nodes_m3 <- function (uh, from, to)
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), 'day')
+    months <- format(dates, '%b')
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+    date_index <- NULL;
+    q_list <- NULL;
+
+    print(length(hbreaks))
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        print (sprintf("round %s of %s", i, length(hbreaks)-1))
+        # get range from t
+        print (sprintf("ts %s ", hbreaks[i] ))
+        uh_sub <- uh[which(uh$date > hbreaks[i] & uh$date <= hbreaks[i+1] ),]
+        if ( length(uh_sub$uptime ) <= 1 )  { next }
+
+        d<- uh_sub$uptime
+
+        print (sprintf("min: %s, median: %s, max: %s", min(d), median(d), max(d)))
+
+        print (sprintf("length: %s", length(d)))
+        q<-quantile(d)
+        print(q)
+
+        date_index <- c(date_index, i)
+
+        xx<- c(xx, hbreaks[i])
+        q_list <- rbind(q_list, q)
+
+    }
+    m<- months[date_index]
+    return (cbind(xx,q_list, m))
+    # 
+
+}
+
+uh <- read.csv('node_uptime_history.csv', header=TRUE, sep=',')
+
+
+dm <- uptime_nodes_m3(uh, "2009-06-10", "2010-02-28")
+
+par(mai=c(1,1,0.1,0.1))
+    plot(dm[,1], as.numeric(dm[,5])/(60*60*24), type='l', lty=1, xlab="",
+            ylim=c(min(as.numeric(dm[,2])/(60*60*24)),max(as.numeric(dm[,5])/(60*60*24))), xlim=c(min(x1[length(x1)/2]), max(x1)), axes=F, ylab="Uptime (days)", col='orange')
+    lines(dm[,1], as.numeric(dm[,4])/(60*60*24), lty=1, col='red')
+    lines(dm[,1], as.numeric(dm[,3])/(60*60*24), lty=1, col='black')
+    lines(dm[,1], as.numeric(dm[,6])/(60*60*24), lty=1, col='orange')
+    lines(dm[,1], as.numeric(dm[,2])/(60*60*24), lty=1, col='blue')
+    #axis(1, labels=dm[,7], at=dm[,1])
+    #axis(2, las=1)
+    #m<-round(max(as.numeric(dm[,4])/(60*60*24)), 2)
+    #axis(2, labels=m, at=m, las=1)
+    #abline(h=m, lty=2, col='grey40')
+
+l<-length(ot[3,])
+l2<-l/2
+axis(1, labels=ot[3,l2:l], at=ot[1,l2:l], cex.axis=0.7)
+axis(2, las=1)
+mtext("2007                                    2008                                    2009", 1,2)
+
+tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=2)
+tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2)
+tstamp_20050615 <-abline_at_date("2005-06-15", col='grey60', lty=2)
+tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=2)
+tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2)
+tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2)
+tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2)
+tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2)
+tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2)
+tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2)
+
+
+text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
+        tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
+        tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
+        tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
+        tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
+        tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
+        tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
+     y=c(120),
+     labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
+
+end_image()
diff --git a/statistics/node_history_all.r b/statistics/node_history_all.r
new file mode 100644 (file)
index 0000000..4180a37
--- /dev/null
@@ -0,0 +1,50 @@
+source("functions.r");
+
+# system("./extract_all.py 2007-* > ../findbad_raw_2007.csv")
+# system("./extract_all.py 2008-* > ../findbad_raw_2008.csv")
+# system("./extract_all.py 2009-* > ../findbad_raw_2009.csv")
+
+fb7 <- read.csv('findbad_raw_2007.csv', sep=',', header=TRUE)
+fb8 <- read.csv('findbad_raw_2008.csv', sep=',', header=TRUE)
+fb9 <- read.csv('findbad_raw_2009.csv', sep=',', header=TRUE)
+
+online_nodes <- function (fb)
+{
+    breaks <- unique(fb$timestamp)
+    n<-NULL
+    o<-NULL
+    x<-NULL
+    for (i in seq(1,length(breaks)) )
+    {
+        ts <- breaks[i]
+        sub <- fb[which(fb$timestamp == ts),]
+        node_count   <- length(unique(sub$hostname))
+        online_count <- length(unique(sub$hostname[which(sub$state=='BOOT')]))
+        x<-c(x,ts)
+        n<-c(n,node_count)
+        o<-c(o,online_count)
+    }
+    print(length(x))
+    print(length(n))
+    print(length(o))
+    return (rbind(x,n,o))
+}
+z7<- online_nodes(fb7)
+z8<- online_nodes(fb8)
+z9<- online_nodes(fb9)
+
+plot(c(z7[1,],z8[1,],z9[1,]), log(c(z7[2,], z8[2,],z9[2,])), 
+        ylim=c(0,7), xlim=c(min(x1), max(x1)), type='p', pch='.', axes=F)
+points(c(z7[1,],z8[1,],z9[1,]) , log(c(z7[3,], z8[3,],z9[3,])), pch='.')
+
+
+
+t_july08 <-unclass(as.POSIXct("2008-07-01", origin="1970-01-01"))[1]
+breaks <- unique(fb8$timestamp[which(fb8$timestamp < t_july08)])
+fb8_boot <- fb8$timestamp[which(fb8$state=="BOOT" & fb8$timestamp < t_july08)]
+h8<-hist(fb8_boot, breaks=breaks[which(!is.na(breaks) & breaks!=0)])
+
+breaks <- unique(as.numeric(as.character(fb9$timestamp)))
+fb9_boot <- as.numeric(as.character(fb9$timestamp[which(fb9$state=="BOOT")]))
+hist(fb9_boot, breaks=breaks[which(!is.na(breaks) & breaks >= 1230775020)])
+
index 75f3c52..0b6b8f3 100644 (file)
@@ -17,12 +17,18 @@ hbreaks<-unclass(as.POSIXct(dates))
 x_start<-unclass(as.POSIXct("2008-05-07", origin="1970-01-01"))[1]
 x_end  <-unclass(as.POSIXct("2009-06-1", origin="1970-01-01"))[1]
 
-start_image("daily-node-count.png")
-plot(dnc2$start[which(!is.na(dnc2$available))], dnc2$registered[which(!is.na(dnc2$available))], 
-    type='l', col='blue', ylim=c(0,900), xlim=c(x_start, x_end),
-    xlab="Date", ylab="Node Count", axes=F)
-lines(dnc2$start[which(!is.na(dnc2$available))], dnc2$available[which(!is.na(dnc2$available))], type='l', col='red', ylim=c(0,900))
-axis(2)
+par(mfrow=c(1,1))
+par(mai=c(.9,.8,.5,.4))
+start_image("daily-node-count.png", height=400)
+#plot(dnc2$start[which(!is.na(dnc2$available))], dnc2$registered[which(!is.na(dnc2$available))], 
+#    type='l', col='blue', ylim=c(0,1000), xlim=c(x_start, x_end),
+#    xlab="Date", ylab="Node Count", axes=F)
+plot(dnc2$start[which(!is.na(dnc2$available))], dnc2$available[which(!is.na(dnc2$available))], 
+    type='l', col='red', ylim=c(0,600), xlim=c(x_start, x_end),
+    xlab="Date", ylab="Online Node Count", axes=F)
+#lines(dnc2$start[which(!is.na(dnc2$available))], dnc2$available[which(!is.na(dnc2$available))], 
+#type='l', col='red', ylim=c(0,1000))
+axis(2, las=1)
 axis(1, labels=months, at=hbreaks)
 
 
@@ -41,18 +47,24 @@ tstamp_0313 <-abline_at_date("2009-03-13", col='grey70')
 text(x=c(tstamp_0610+(tstamp_0815-tstamp_0610)/2,
          tstamp_0815+(tstamp_0905-tstamp_0815)/2,
          tstamp_0924+(tstamp_1015-tstamp_0924)/2, 
-         tstamp_1015+(tstamp_1105-tstamp_1015)/2, 
+         tstamp_1015+(tstamp_1214-tstamp_1015)/2, 
          tstamp_1214+(tstamp_0223-tstamp_1214)/2, 
          tstamp_0223+(tstamp_0313-tstamp_0223)/2), 
      y=c(0),
      labels=c("Kernel bug", 'fix1', 'fix2', 'fix3', 'Notice bug', 'fix4')) #, 'fix 2', 'fix 3', 'fix 4'))
 
-legend(unclass(as.POSIXct("2009-03-13", origin="1970-01-01"))[1], 200,
+mtext("2008                                 2009", 1,2)
+legend(unclass(as.POSIXct("2009-03-13", origin="1970-01-01"))[1], 100,
         cex=0.7,
-        legend=c("Registered", "Available", 'Kernel Update', 'MyOps Event'),
-        pch=c('-', '-', '-', '-'),
-        col=c('blue', 'red', 'grey20', 'grey70'),
-        lty=c(1, 1, 2, 1), merge=T)
+        legend=c("Online", 'Kernel Update', 'MyOps Event'),
+        pch=c('-', '-', '-'),
+        col=c('red', 'grey20', 'grey70'),
+        lty=c(1, 2, 1), merge=T)
+
+        #legend=c("Registered", "Online", 'Kernel Update', 'MyOps Event'),
+        #pch=c('-', '-', '-', '-'),
+        #col=c('blue', 'red', 'grey20', 'grey70'),
+        #lty=c(1, 1, 2, 1), merge=T)
 
 end_image()
 
diff --git a/statistics/node_history_ttr.r b/statistics/node_history_ttr.r
new file mode 100644 (file)
index 0000000..6e7b770
--- /dev/null
@@ -0,0 +1,173 @@
+source("functions.r");
+
+nsh <- read.csv('node_status_history.csv', sep=',', header=TRUE)
+
+# system("./harvest_nodehistory.py > node_status_history_nopcu.csv")
+nsh_nopcu <- read.csv('node_status_history_nopcu.csv', sep=',', header=TRUE)
+
+nsh_m1 <- read.csv('node_status_history_m1.csv', sep=',', header=TRUE)
+# system("stats-m1/harvest_nodehistory_m1.py > ./node_status_history_m1_nopcu.csv")
+nsh_m1_nopcu <- read.csv('node_status_history_m1_nopcu.csv', sep=',', header=TRUE)
+nsh_m1_nopcu_may <- read.csv('node_status_history_m1_nopcu_may08sep08.csv', sep=',', header=TRUE)
+
+node_hist_image <- function (t, year, from, to, max=0, type="week", title="")
+{
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, "%b-%d")
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    image <- matrix(data=0, nrow=max(as.numeric(t$hostname)), ncol=length(hbreaks))
+
+    for ( i in seq(1, length(hbreaks)) )
+    {
+        # find the range : d plus a day
+        d <- hbreaks[i]
+        d_end <- d+60*60*24
+        # find unique hosts in this day range
+        t_sub <- t[which(t$start > d & t$start <= d_end & t$status == 'down'),]
+        unique_hosts <- unique(t_sub$hostname)
+        if (length(unique_hosts) == 0 ) { next }
+
+        host_n_list <- unique_hosts
+        host_s_list <- as.character(unique_hosts)
+
+        for ( hi in seq(1, length(unique_hosts))  ) 
+        {
+            host_s <- host_s_list[hi]
+            host_n <- host_n_list[hi]
+            # events for this host after d (to avoid already identified events)
+            ev <- t[which(t$hostname == host_s & t$start > d ),]
+            print (sprintf("events length for host %s %s", host_s, length(ev$start)));
+            # get down events for this host
+            down_ev_index_list <- which(ev$status == 'down')
+            for ( e_i in down_ev_index_list )
+            {
+                if ( e_i == length(ev$status) ) { 
+                    # then the node ends down, so fill in the rest with 1.
+                    for ( j in seq(i,length(hbreaks)) ) {
+                        image[host_n,j] <- 1
+                    }
+                } else {
+                    # then there is a subsequent 'good' event
+                    good_ev <- ev[e_i+1,]
+                    down_ev <- ev[e_i,]
+                    dbreaks <- seq(d,good_ev$start+60*60*24,60*60*24)
+                    # for every index for time d, to good_ev$start
+                    l<-length(dbreaks)
+                    print (sprintf("length %s",l));
+                    for ( j in seq(i,i+l) )
+                    {
+                        image[host_n,j] <- 1
+                    }
+                }
+            }
+        }
+    }
+    myImagePlot(image, xLabels=months, yLabels=c(""), title=title)
+    return (image);
+}
+
+
+
+node_hist_dist <- function (t, year, from, to, max=0, type="week", title="")
+{
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, "%b-%d")
+    hbreaks<-unclass(as.POSIXct(dates))
+    current_ts <- unclass(as.POSIXct(Sys.Date()))
+
+    dist <- NULL
+
+    unique_hosts <- unique(t$hostname)
+    host_n_list <- unique_hosts
+    host_s_list <- as.character(unique_hosts)
+
+    down <- 0
+
+    for ( hi in seq(1, length(unique_hosts))  ) 
+    {
+        host_s <- host_s_list[hi]
+        host_n <- host_n_list[hi]
+        # events for this host after d (to avoid already identified events)
+        ev <- t[which( t$hostname == host_s ),]
+        print (sprintf("events length for host %s %s", host_s, length(ev$start)));
+        # get down events for this host
+        down_ev_index_list <- which(ev$status == 'down')
+        for ( e_i in down_ev_index_list )
+        {
+            # when equal, there is no resolution so leave it as down
+            if ( e_i != length(ev$status) ) { 
+                good_ev <- ev[e_i+1,]
+                down_ev <- ev[e_i,]
+                dist <- c(dist, good_ev$start - down_ev$start)
+            } else if ( e_i == length(ev$status) && length(ev$status) == 1) { 
+                print (sprintf("DOWN FOREVER! %s", length(ev$start) ))
+                down <- down + 1
+                dist <- c(dist, 10*current_ts - ev$start)
+            }
+        }
+    }
+    print(down);
+    return (dist);
+}
+
+nsh_image <- node_hist_image(nsh, '2009', '2009-06-01', '2010-02-28', 0, 'day')
+nsh_image_m1 <- node_hist_image(nsh_m1, '2009', '2008-10-01', '2009-03-28', 0, 'day')
+
+
+nsh_short <- nsh[which(nsh$start > unclass(as.POSIXct("2009-06-01", origin="1970-01-01"))[1]),]
+nsh_short <- nsh_short[which(nsh_short$start < unclass(as.POSIXct("2009-10-31", origin="1970-01-01"))[1]),]
+
+
+nsh_short <- nsh_nopcu
+nsh_dist <- node_hist_dist(nsh_short, '2009', '2009-06-01', '2010-02-28', 0, 'day')
+d<- ecdf(nsh_dist/(60*60*24))
+
+#nsh_m1_short <- nsh_m1[which(nsh_m1$start > unclass(as.POSIXct("2008-10-01", origin="1970-01-01"))[1]),]
+nsh_m1_short <- nsh_m1_nopcu
+# NOTE: something happened betweeen 10-2 and 10-3
+t_1015 <- unclass(as.POSIXct("2008-10-15", origin="1970-01-01"))[1]
+t_0224 <- unclass(as.POSIXct("2009-02-24", origin="1970-01-01"))[1]
+nsh_m1_short <- nsh_m1_nopcu[which(nsh_m1_nopcu$start > t_1015 & nsh_m1_nopcu$start <= t_0224),]
+
+nsh_m1_short <- nsh_m1_nopcu
+nsh_dist_m1 <- node_hist_dist(nsh_m1_short, '2008', '2008-10-01', '2009-03-22', 0, 'day')
+d_m1<- ecdf(nsh_dist_m1/(60*60*24))
+
+
+t_0530 <- unclass(as.POSIXct("2008-05-30", origin="1970-01-01"))[1]
+t_0815 <- unclass(as.POSIXct("2008-08-15", origin="1970-01-01"))[1]
+nsh_m1_short <- nsh_m1_nopcu_may[which(nsh_m1_nopcu_may$start > t_0530 & nsh_m1_nopcu_may$start <= t_0815),]
+nsh_dist_m1 <- node_hist_dist(nsh_m1_short, '2008', '2008-05-10', '2008-08-15', 0, 'day')
+d_m1_may <- ecdf(nsh_dist_m1/(60*60*24))
+
+
+# d<-ecdf(nsh_dist[which(nsh_dist/(60*60*24) < 90 )]/(60*60*24)), 
+# 180 ~= 6 months.
+par(mfrow=c(1,1))
+par(mai=c(.9,.9,.1,.1))
+start_image("node_history_ttr_nopcu.png")
+plot(d, xlim=c(0,180), ylim=c(0,1), axes=F, xlab="Days to Resolve", ylab="Percentile",
+   col.hor='red', col.vert='red', pch='.', col.points='red', main="")
+plot(d_m1, xlim=c(0,180), ylim=c(0,1), xlab="Days to Resolve", ylab="Percentile",
+   col.hor='blue', col.vert='blue', pch='.', col.points='blue', add=TRUE)
+plot(d_m1_may, xlim=c(0,180), ylim=c(0,1), xlab="Days to Resolve", ylab="Percentile",
+   col.hor='green', col.vert='green', pch='.', col.points='green', add=TRUE)
+
+weeks <- c(0,7,14,21,28,60,90,120,150,180)
+axis(1, labels=weeks, at=weeks)
+percentages <- c(0,0.25, 0.5, 0.75, 0.85, 0.95, 1)
+axis(2, las=1, labels=percentages, at=percentages)
+
+abline(v=c(7,14,21,28), col='grey80', lty=2)
+abline(h=c(0.5, 0.6, 0.75, 0.85, 0.95 ), col='grey80', lty=2)
+abline(v=c(91), col='grey80', lty=2)
+
+
+legend(100, 0.1,
+       cex=0.7,
+       legend=c("Typical MyOps -- July2009-Feb2010", "Notice Bug -- Oct2008-Mar2009", "Kernel Bug -- May2008-Sept2008"),
+       pch=c('-', '-', '-'),
+       col=c('red', 'blue', 'green'),
+       lty=c(1, 1, 1), merge=T)
+end_image()
diff --git a/statistics/node_status_jun09feb10.r b/statistics/node_status_jun09feb10.r
new file mode 100644 (file)
index 0000000..2ba5da8
--- /dev/null
@@ -0,0 +1,47 @@
+source("functions.r");
+
+# data collected from M3 fb db 
+# system("./harvest_nodestatus.py  > node-status-jun09-feb10.csv")
+ns <- read.csv('node-status-jun09-feb10.csv', sep=',', header=TRUE)
+
+available_nodes <- function (ns, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # get range from ns
+        ns_sub <- ns[which(ns$date > hbreaks[i] & ns$date <= hbreaks[i+1] & ns$status == 'BOOT'),]
+        nodes <- length(ns_sub$date)
+
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, nodes)
+
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,m))
+}
+
+an <- available_nodes(ns, "2009-06-10", "2010-02-28", 'day')
+
+x_start<-unclass(as.POSIXct("2009-06-10", origin="1970-01-01"))[1]
+x_end  <-unclass(as.POSIXct("2010-02-28", origin="1970-01-01"))[1]
+
+par(mfrow=c(1,1))
+par(mai=c(.9,.8,.5,.4))
+#start_image("daily-node-count.png")
+sx<-an[1,][which(as.numeric(an[2,]) > 100)]
+sy<-an[2,][which(as.numeric(an[2,]) > 100)]
+plot(sx, sy,
+    type='l', col='blue', ylim=c(0,1000), xlim=c(x_start, x_end),
+    xlab="Date", ylab="Node Count", axes=F)
+axis(2, las=1)
+axis(1, labels=months, at=hbreaks)
+
+
diff --git a/statistics/operator_overhead.r b/statistics/operator_overhead.r
new file mode 100644 (file)
index 0000000..03dc5a0
--- /dev/null
@@ -0,0 +1,189 @@
+source("functions.r");
+
+
+available_nodes <- function (ns, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # get range from ns
+        ns_sub <- ns[which(ns$date > hbreaks[i] & ns$date <= hbreaks[i+1] & ns$status == 'BOOT'),]
+        nodes <- length(ns_sub$date)
+
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, nodes)
+
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,m))
+}
+
+
+
+open_tickets <- function (t, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # identify any tickets with a start time in range, lastreply in range
+        # or where both start is less and lastreply is greater than the range
+        t_sub <- t[which( (t$start < hbreaks[i] & t$lastreply > hbreaks[i+1]) | 
+                          (t$start > hbreaks[i] & t$start <= hbreaks[i+1]) | 
+                          (t$lastreply > hbreaks[i] & t$lastreply <= hbreaks[i+1]) ),]
+        tickets <- length(t_sub$start)
+        #if ( nrow(t_sub) > 0 ){
+        #    for ( j in seq(1,nrow(t_sub)) )
+        #    {
+        #        #print(sprintf("id %s, date %s", t_sub[i,'ticket_id'], t_sub[i,'s1']))
+        #        print(sprintf("id %s, date %s", t_sub[j,]$ticket_id, t_sub[j, 's1']))
+        #    }
+        #}
+
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, tickets)
+
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,m))
+}
+
+online_nodes <- function (fb)
+{
+    breaks <- unique(fb$timestamp)
+    n<-NULL
+    o<-NULL
+    x<-NULL
+    for (i in seq(1,length(breaks)) )
+    {
+        ts <- breaks[i]
+        sub <- fb[which(fb$timestamp == ts),]
+        node_count   <- length(unique(sub$hostname))
+        online_count <- length(unique(sub$hostname[which(sub$state=='BOOT')]))
+        x<-c(x,ts)
+        n<-c(n,node_count)
+        o<-c(o,online_count)
+    }
+    print(length(x))
+    print(length(n))
+    print(length(o))
+    return (rbind(x,n,o))
+}
+
+lowess_smooth <- function (x, y, delta=(60*60*24), f=0.02)
+{
+    a<-lowess(x, y, delta=delta, f=f)
+    return (a);
+}
+
+#####
+
+# system("parse_rt_data.py 3 > rt_data.csv");
+t <- read.csv('rt_data_2004-2010.csv', sep=',', header=TRUE)
+t2 <- t[which(t$complete == 1),]
+ot <- open_tickets(t2, '2004/1/1', '2010/2/28', 'day', "%b")
+
+start_image("rt_operator_overhead.png")
+par(mfrow=c(2,1))
+par(mai=c(0,1,0.1,0.1))
+
+x1<-as.numeric(ot[1,])
+y1<-as.numeric(ot[2,])
+
+a_ot<-lowess_smooth(x1, y1)
+
+plot(x1, y1, col='grey80', type='l', axes=F, 
+    ylab="Open Tickets (tickets/day)", xlab="Date",
+    ylim=c(0,120)) # , ylim=c(0,260))
+lines(a_ot$x, round(a_ot$y), col='black')
+
+#axis(1, labels=ot[3,], at=ot[1,], cex.axis=0.7)
+axis(2, las=1)
+#mtext("2004           2005           2006           2007           2008           2009", 1,2)
+
+#abline_at_date('2005-01-01', 'grey60')
+#abline_at_date('2006-01-01', 'grey60')
+#abline_at_date('2007-01-01', 'grey60')
+#abline_at_date('2008-01-01', 'grey60')
+#abline_at_date('2009-01-01', 'grey60')
+#abline_at_date('2010-01-01', 'grey60')
+abline(h=25, lty=2, col='grey80')
+abline(h=40, lty=2, col='grey80')
+
+tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=2)
+tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2)
+tstamp_20050615 <-abline_at_date("2005-06-15", col='grey60', lty=2)
+tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=2)
+tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2)
+tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2)
+tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2)
+tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2)
+tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2)
+tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2)
+
+
+text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
+        tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
+        tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
+        tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
+        tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
+        tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
+        tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
+     y=c(120),
+     labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
+
+par(mai=c(1,1,0.1,0.1))
+for ( s in c(7) ) 
+{
+    d<- median_time_to_resolve_window(t2, "2004/1/1", "2010/2/28", s, "%b")
+    plot(d[,1], exp(as.numeric(d[,5]))/24, type='l', lty=1, xlab="",
+            axes=F, ylim=c(0.01, 15), ylab="Resolution Time by", col='grey50',
+            xlim=c(min(x1), max(x1)))
+    mtext("Quartile (days)", 2, 2)
+    lines(d[,1], exp(as.numeric(d[,4]))/24, lty=1, col='black')
+    lines(d[,1], exp(as.numeric(d[,3]))/24, lty=1, col='grey50')
+    #axis(1, labels=d[,7], at=d[,1])
+    axis(1, labels=ot[3,], at=ot[1,], cex.axis=0.7)
+    mtext("2004           2005           2006           2007           2008           2009", 1,2)
+    axis(2, las=1)
+    m<-round(max(exp(as.numeric(d[,4]))/24), 2)
+    axis(2, labels=m, at=m, las=1)
+    abline(h=m, lty=2, col='grey40')
+}
+
+tstamp_20041112 <-abline_at_date("2004-11-12", col='grey60', lty=2)
+tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2)
+tstamp_20050615 <-abline_at_date("2005-06-15", col='grey60', lty=2)
+tstamp_20051023 <-abline_at_date("2005-10-23", col='grey60', lty=2)
+tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2)
+tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2)
+tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2)
+tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2)
+tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2)
+tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2)
+
+
+text(x=c( tstamp_20041112+(tstamp_20050301-tstamp_20041112)/2,
+        tstamp_20050301+(tstamp_20050615-tstamp_20050301)/2,
+        tstamp_20050615+(tstamp_20051023-tstamp_20050615)/2,
+        tstamp_20051023+(tstamp_20070101-tstamp_20051023)/2,
+        tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
+        tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
+        tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
+     y=c(15),
+     labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
+
+end_image()
index 9ba227b..0c32b3b 100644 (file)
@@ -39,7 +39,6 @@ d <- (t2$lastreply - t2$start)/(60*60)
 #h<-hist(log(d2), plot=F, breaks=50)
 #lines(h$breaks[which(h$counts!=0)], h$counts[which(h$counts!=0)])
 
-
 # this doesn't work as I would like.  I think the bins aren't as I expect
 #h <- hist(d, plot=F, breaks=c(seq(0,max(d)+1, .1)))
 #plot(h$counts, log="x", pch=20, col="blue",
@@ -49,12 +48,12 @@ d <- (t2$lastreply - t2$start)/(60*60)
 #plot(log(d2))
 #plot(ecdf(d2))
 
-tstamp_45 <-unclass(as.POSIXct("2005-01-01", origin="1960-01-01"))[1]
-tstamp_56 <-unclass(as.POSIXct("2006-01-01", origin="1960-01-01"))[1]
-tstamp_67 <-unclass(as.POSIXct("2007-01-01", origin="1960-01-01"))[1]
-tstamp_78 <-unclass(as.POSIXct("2008-01-01", origin="1960-01-01"))[1]
-tstamp_89 <-unclass(as.POSIXct("2009-01-01", origin="1960-01-01"))[1]
-tstamp_90 <-unclass(as.POSIXct("2010-01-01", origin="1960-01-01"))[1]
+tstamp_45 <-unclass(as.POSIXct("2005-01-01", origin="1970-01-01"))[1]
+tstamp_56 <-unclass(as.POSIXct("2006-01-01", origin="1970-01-01"))[1]
+tstamp_67 <-unclass(as.POSIXct("2007-01-01", origin="1970-01-01"))[1]
+tstamp_78 <-unclass(as.POSIXct("2008-01-01", origin="1970-01-01"))[1]
+tstamp_89 <-unclass(as.POSIXct("2009-01-01", origin="1970-01-01"))[1]
+tstamp_90 <-unclass(as.POSIXct("2010-01-01", origin="1970-01-01"))[1]
 
 
 t_4 <- t2[which( t2$start <  tstamp_45 ),]
@@ -87,6 +86,81 @@ year_hist(t_8, "2008", "2007/12/30", "2009/1/7", 85)
 year_hist(t_9, "2009", "2008/12/28", "2010/1/30", 85)
 end_image()
 
+h4<-year_hist(t_4, "2004", "2003/12/28", "2005/2/7", 0, type='month', fmt="%b")
+h5<-year_hist(t_5, "2005", "2005/1/2", "2006/2/7", 0, type='month', fmt="%b")
+h6<-year_hist(t_6, "2006", "2006/1/1", "2007/2/7", 0, type='month', fmt="%b")
+h7<-year_hist(t_7, "2007", "2006/12/31", "2008/2/7", 0, type='month', fmt="%b")
+h8<-year_hist(t_8, "2008", "2007/12/30", "2009/2/7", 0, type='month', fmt="%b")
+h9<-year_hist(t_9, "2009", "2008/12/28", "2010/1/30", 0, type='month', fmt="%b")
+
+hall<-year_hist(t2, "200x", "2004/1/1", "2010/3/28", 0, type='month', fmt="%b")
+
+threshold <- function (hall, d, from, to, type, fmt="%b")
+{
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    x<-seq(1,length(hall$breaks))
+    a_x<-x[which(hall$counts>d)]
+    a_y<-hall$counts[which(hall$counts>d)]
+    b_x<-x[which(hall$counts<d)]
+    b_y<-hall$counts[which(hall$counts<d)]
+
+    plot(a_x, a_y, type='p', col='red', ylim=c(0,260), xlim=c(0,81), axes=F)
+    points(b_x, b_y, type='p', col='blue', ylim=c(0,260), xlim=c(0,81))
+    axis(1, labels=months, at=x)
+    axis(2)
+    abline(v=seq(13,length(months),12))
+}
+
+years <- 7
+b<- seq(1,years*12,12)
+yy<-NULL
+for (i in seq(1,years) )
+{
+    if ( i+1 > length(b) ) { 
+        yy<- rbind(yy,hall$counts[b[i]:length(hall$counts)])
+    } else {
+        yy<- rbind(yy,hall$counts[b[i]:b[i+1]-1])
+    }
+}
+yy[7,3:12]<-0   # no data for beyond feb.
+y2<-NULL ; for ( i in seq(1,12) ) { y2<-c(y2,sum(yy[,i])) }
+
+start_image('rt_aggregate_months.png', width=600, height=300)
+barplot(y2, space=.1, width=.9, col=c('blue','red', 'red', 'red', 'red', 
+    'blue', 'blue', 'red', 'red', 'red', 'blue', 'blue'),
+    xlab="Months", ylab="Sum of Tickets over 6 years")
+axis(1, labels=c('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
+    'Sep', 'Oct', 'Nov', 'Dec'), at=c(0,1,2,3,4,5,6,7,8,9,10,11)+.5)
+end_image()
+
+cc<-NULL ; 
+for (i in 1:length(yy)) 
+{ 
+    if ( t(yy)[i] < 80 ) 
+    { 
+        cc<- c(cc, 'blue') 
+    } else { 
+        cc<- c(cc, 'red') 
+    } 
+} 
+barplot(yy, col=cc)
+
+# skip 2007
+start_image('rt_aggregate_months_no2007.png', width=600, height=300)
+y3<-NULL ; for ( i in seq(1,12) ) { y3<-c(y3,sum(yy[1:3,i], yy[5:7,i])) }
+barplot(y3, , space=.1, width=.9, col=c('blue','blue', 'red', 'red', 'red', 
+    'blue', 'blue', 'red', 'red', 'red', 'blue', 'blue'),
+    xlab="Months", ylab="Sum of Tickets over 6 years")
+axis(1, labels=c('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
+    'Sep', 'Oct', 'Nov', 'Dec'), at=c(0,1,2,3,4,5,6,7,8,9,10,11)+.5)
+end_image()
+
+
+
+
 par(mai=c(0.7,0.7,0.7,0.7))
 par(mfrow=c(1,1))
 
@@ -118,8 +192,245 @@ time_hist <- function (t, lessthan, year, log=T, breaks=30, xlim=c(-4,10), ylim=
     }
     return (h);
 }
+
+median_time_to_resolve_window <- function (t, from, to, window, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), 'week')
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+    yy_sd_high<-NULL;
+    yy_sd_low<-NULL;
+    date_index <- NULL;
+    q_list <- NULL;
+
+    x<-seq(-20,20,0.01)
+    for ( i in seq(1,length(hbreaks)-window-1) )
+    {
+        print (sprintf("round %s of %s", i, length(hbreaks)-window-1))
+        # get range from t
+        t_sub <- t[which(t$start > hbreaks[i] & t$start<= hbreaks[i+window]),]
+        if ( length(t_sub$start) <= 1 )  { next }
+        # take log, then sn.mle -> h
+        d <- (t_sub$lastreply - t_sub$start)/(60*60)    # hours
+        d <- log(d)                                     # log(hours)
+            # sn.mle
+        print (sprintf("length: %s", length(d)))
+        q<-quantile(d)
+        print(q)
+
+        date_index <- c(date_index, round(i+window/2))
+
+        xx<- c(xx, hbreaks[round(i+window/2)])
+        q_list <- rbind(q_list, q)
+
+    }
+    m<- months[date_index]
+    return (cbind(xx,q_list, m))
+}
+mean_time_to_resolve_window <- function (t, from, to, window, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), 'week')
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+    yy_sd_high<-NULL;
+    yy_sd_low<-NULL;
+    date_list <- NULL;
+
+    x<-seq(-20,20,0.01)
+    for ( i in seq(1,length(hbreaks)-window-1) )
+    {
+        print (sprintf("round %s of %s", i, length(hbreaks)-window-1))
+        # get range from t
+        t_sub <- t[which(t$start > hbreaks[i] & t$start<= hbreaks[i+window]),]
+        if ( length(t_sub$start) <= 1 )  { next }
+        # take log, then sn.mle -> h
+        d <- (t_sub$lastreply - t_sub$start)/(60*60)    # hours
+        d <- log(d)                                     # log(hours)
+            # sn.mle
+        print (sprintf("length: %s", length(d)))
+        avg<-mean(d)
+        s<-sd(d)
+        r<-shapiro.test(d) #, mean=avg, sd=s)
+        if ( r$statistic < 0.9 ){
+            print (r);
+        }
+
+        m<-dnorm(x, mean=avg, sd=s)
+        print(avg)
+        # find max of y
+        y_peak <- x[which(m==max(m))]
+        print(y_peak)
+        # plot point date, max(y)
+        xx<- c(xx, hbreaks[round(i+window/2)])
+        yy<- c(yy, y_peak)
+        yy_sd_high<- y_peak + s
+        yy_sd_low <- y_peak - s
+        date_list <- c(date_list, dates[i])
+        # plot whisker2(x0,y0,y0_hi,y0_lo)
+    }
+    l<-length(months)-window-1
+    m<- months[1:l]
+    return (rbind(xx,yy,yy_sd_high, yy_sd_low, m))
+}
+require(sn)
+sknorm_time_to_resolve_window <- function (t, from, to, window, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), 'week')
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+    yy_sd_high<-NULL;
+    yy_sd_low<-NULL;
+    date_list <- NULL;
+
+    x<-seq(-20,20,0.01)
+    for ( i in seq(1,length(hbreaks)-window-1) )
+    {
+        print (sprintf("round %s of %s", i, length(hbreaks)-window-1))
+        # get range from t
+        t_sub <- t[which(t$start > hbreaks[i] & t$start<= hbreaks[i+window]),]
+        if ( length(t_sub$start) <= 1 )  { next }
+        # take log, then sn.mle -> h
+        d <- (t_sub$lastreply - t_sub$start)/(60*60)    # hours
+        d <- log(d)                                     # log(hours)
+            # sn.mle
+        print (sprintf("length: %s", length(d)))
+        h<-sn.em(y=d)
+        if ( abs(h$cp['skewness']) > 0.95 )
+        {
+            print(h)
+            next    # just skip it
+        }
+
+        # find dsn() using h parameters  -> y
+        m<-dsn(x, dp=cp.to.dp(h$cp))
+        # find max of y
+        y_peak <- x[which(m==max(m))]
+        # plot point date, max(y)
+        xx<- c(xx, hbreaks[round(i+window/2)])
+        yy<- c(yy, y_peak)
+        yy_sd_high<- y_peak + h$cp['s.d.']
+        yy_sd_low <- y_peak - h$cp['s.d.']
+        date_list <- c(date_list, dates[i])
+        # plot whisker2(x0,y0,y0_hi,y0_lo)
+    }
+    l<-length(months)-window-1
+    m<- months[1:l]
+    return (rbind(xx,yy,yy_sd_high, yy_sd_low, m))
+}
+
+# NOTE: Try something simpler, like median of the log of ttr.
+#       it's going to be a lot of work to explain lsn distributions.  something
+#       more obvious would be a lot easier.
+
+par(mfrow=c(4,1))
+par(mai=c(.3,0.3,0.3,0.3))
+for ( s in c(7)) #,14,21) )
+{
+    d<- median_time_to_resolve_window(t2, "2004/1/1", "2010/2/28", s, "%b%y")
+    plot(d[,1], exp(as.numeric(d[,5]))/24, type='l', lty=1, xlab="",
+            axes=F, ylim=c(0.01, 15), ylab="Days to Resolve", col='orange')
+    lines(d[,1], exp(as.numeric(d[,4]))/24, lty=1, col='red')
+    lines(d[,1], exp(as.numeric(d[,3]))/24, lty=1, col='black')
+    axis(1, labels=d[,7], at=d[,1])
+    axis(2, las=1)
+    m<-round(max(exp(as.numeric(d[,4]))/24), 2)
+    axis(2, labels=m, at=m, las=1)
+    abline(h=m, lty=2, col='grey40')
+}
+
+# monitor
+    d2<- median_time_to_resolve_window(m2, "2007/02/1", "2010/2/28", s, "%b%y")
+    plot(d[,1], exp(as.numeric(d[,2]))/24, type='l', lty=1, xlab="",
+            axes=F, ylim=c(0.01, 165), ylab="Days to Resolve", col='white')
+    lines(d2[,1], exp(as.numeric(d2[,5]))/24, lty=1, col='red')
+    lines(d2[,1], exp(as.numeric(d2[,4]))/24, lty=1, col='red')
+    lines(d2[,1], exp(as.numeric(d2[,3]))/24, lty=1, col='black')
+    axis(1, labels=d[,7], at=d[,1])
+    axis(2, las=1)
+    m<-round(max(exp(as.numeric(d2[,4]))/24), 2)
+    axis(2, labels=m, at=m, las=1)
+    abline(h=m, lty=2, col='grey40')
+
+
+
+mean_time_to_resolve <- function (t, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+    yy_sd_high<-NULL;
+    yy_sd_low<-NULL;
+    date_list <- NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # get range from t
+        t_sub <- t[which(t$start > hbreaks[i] & t$start<= hbreaks[i+1]),]
+        if ( length(t_sub$start) == 0 )  { next }
+        # take log, then sn.mle -> h
+        d <- (t_sub$lastreply - t_sub$start)/(60*60)    # hours
+        d <- log(d)                                     # log(hours)
+            # sn.mle
+        h<-sn.em(y=d)
+        if ( abs(h$cp['skewness']) > 0.95 )
+        {
+            print(h)
+        }
+
+        # find dsn() using h parameters  -> y
+        x<-seq(-8,10,0.01)
+        m<-dsn(x, dp=cp.to.dp(h$cp))
+        # find max of y
+        y_peak <- x[which(m==max(m))]
+        # plot point date, max(y)
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, y_peak)
+        yy_sd_high<- y_peak + h$cp['s.d.']
+        yy_sd_low <- y_peak - h$cp['s.d.']
+        date_list <- c(date_list, dates[i])
+        # plot whisker2(x0,y0,y0_hi,y0_lo)
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,yy_sd_high, yy_sd_low, m))
+}
+
+
+par(mfrow=c(5,1))
+par(mai=c(.3,0.3,0.3,0.3))
+for ( s in c("10 days", "2 weeks", "3 weeks", "month", "2 months"))
+#for ( s in c("month") )
+{
+    d<- mean_time_to_resolve(t2, "2004/1/1", "2010/2/28", s, "%b%y")
+    plot(d[1,], exp(as.numeric(d[2,]))/24, type='l', axes=F)
+    points(d[1,], exp(as.numeric(d[2,]))/24, pch=23) 
+    axis(1, labels=d[5,], at=d[1,])
+    axis(2)
+}
+
+
+
 tstamp <-unclass(as.POSIXct("2007-05-01", origin="1960-01-01"))
-t_7a <- t_7[which(t_7$start < tstamp),]
+t_7a <- t_7[t_rep <- read.csv('rt_replies.csv', sep=',', header=TRUE)
+t2_rep <- t_rep[which(t_rep$complete == 1),]
+t2_rep <- t_rep[which(t_rep$diff != 0),]
+
+which(t_7$start < tstamp),]
 t_7b <- t_7[which(t_7$start >= tstamp),]
 
 #end_image()
@@ -222,6 +533,7 @@ whisker2 <- function (x0,y0, y0_high, y0_low, col="black", length=0.05)
     arrows(x0, y0, x0, y0_low, code=2, angle=90, length=length, col=col)
 }
 
+# NOTE: ** monthly averages might make a more compelling case than annual averages.
 start_image("rt_aggregate_times.png")
 par(mfrow=c(1,1))
 par(mai=c(1,1,1,1))
@@ -297,10 +609,10 @@ lines(c(x_tt_resolve_list[3], x_tt_resolve_list[5:7]), c(days_tt_resolve[3], day
 lines(mx_tt_resolve_list, mdays_tt_resolve, col='blue')
 points(mx_tt_resolve_list, mdays_tt_resolve, pch=c(24))
 
-ticks<-c(0,0.01, 0.1, 0.5,1,2,4,7,21, 28, 7*8, 7*16)
+ticks<-c(0,0.01, 0.1, 0.5,1,2,4,7,14,21, 28, 60, 120)
 
 axis(1, labels=c('2004', '2005', '2006', '2007', '2008', '2009'), at=x_tick_list)
-axis(2, labels=ticks, at=ticks)
+axis(2, las=1, labels=ticks, at=ticks)
 mtext("Days to Resolve Message", 2, line=3)
 #axis(2, labels=ticks, at=ticks)
 #for (i in 1:length(days_y_sd_list) ) {
@@ -322,9 +634,12 @@ for (i in 1:length(mdays_y_sd_list) ) {
             mdays_tt_resolve_high[i], mdays_tt_resolve_low[i], col='blue')
 }
 
-abline(h=21,col='grey90')
-abline(h=2,col='grey90')
-abline(h=0.5,col='grey80')
+abline(h=120,col='grey80', lty=2)
+abline(h=21,col='grey80', lty=2)
+abline(h=7,col='grey80', lty=2)
+abline(h=2,col='grey80', lty=2)
+abline(h=0.5,col='grey80', lty=2)
+abline(h=0.1,col='grey80', lty=2)
 
 legend(1, .05, 
         cex=0.7,
@@ -413,3 +728,65 @@ for (i in 1:length(t_sd) ) {
 #plot_rt_hist(t_89)
 par(mfrow=c(1,1))
 
+
+# system("./parse_rt_replies.py 3> rt_replies.csv")
+t_rep <- read.csv('rt_replies.csv', sep=',', header=TRUE)
+t2_rep <- t_rep[which(t_rep$complete == 1),]
+t2_rep <- t_rep[which(t_rep$diff != 0),]
+
+mean_diff_time <- function (t, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+    yy_sd_high<-NULL;
+    yy_sd_low<-NULL;
+    date_list <- NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # get range from t
+        t_sub <- t[which(t$prev > hbreaks[i] & t$prev <= hbreaks[i+1]),]
+        if ( length(t_sub$start) == 0 )  { next }
+        # take log, then sn.mle -> h
+        d <- (abs(t_sub$diff)/(60*60))
+        d <- log(d)                                     # log(hours)
+            # sn.mle
+        h<-sn.em(y=d)
+        if ( abs(h$cp['skewness']) > 0.95 )
+        {
+            print(h)
+        }
+
+        # find dsn() using h parameters  -> y
+        x<-seq(-8,10,0.01)
+        m<-dsn(x, dp=cp.to.dp(h$cp))
+        # find max of y
+        y_peak <- x[which(m==max(m))]
+        # plot point date, max(y)
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, y_peak)
+        yy_sd_high<- y_peak + h$cp['s.d.']
+        yy_sd_low <- y_peak - h$cp['s.d.']
+        date_list <- c(date_list, dates[i])
+        # plot whisker2(x0,y0,y0_hi,y0_lo)
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,yy_sd_high, yy_sd_low, m))
+}
+
+par(mfrow=c(5,1))
+par(mai=c(.3,0.3,0.3,0.3))
+for ( s in c("2 weeks", "3 weeks", "month", "2 months"))
+#for ( s in c("month") )
+{
+    d<- mean_diff_time(t2_rep, "2004/1/1", "2010/2/28", s, "%b%y")
+    plot(d[1,], exp(as.numeric(d[2,]))/24, type='l', axes=F)
+    points(d[1,], exp(as.numeric(d[2,]))/24, pch=23) 
+    axis(1, labels=d[5,], at=d[1,])
+    axis(2)
+}
diff --git a/statistics/rt_data_opentickets.r b/statistics/rt_data_opentickets.r
new file mode 100644 (file)
index 0000000..998a936
--- /dev/null
@@ -0,0 +1,86 @@
+source("functions.r");
+
+# system("parse_rt_data.py 3 > rt_data.csv");
+t <- read.csv('rt_data_2004-2010.csv', sep=',', header=TRUE)
+t2 <- t[which(t$complete == 1),]
+
+
+open_tickets <- function (t, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # get range from t
+        t_sub <- t[which(t$start > hbreaks[i] & t$lastreply <= hbreaks[i+1]),]
+        tickets <- length(t_sub$start)
+        #if ( nrow(t_sub) > 0 ){
+        #    for ( j in seq(1,nrow(t_sub)) )
+        #    {
+        #        #print(sprintf("id %s, date %s", t_sub[i,'ticket_id'], t_sub[i,'s1']))
+        #        print(sprintf("id %s, date %s", t_sub[j,]$ticket_id, t_sub[j, 's1']))
+        #    }
+        #}
+
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, tickets)
+
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,m))
+}
+
+ot <- open_tickets(t2, '2004/1/1', '2010/2/28', 'week', "%b%y")
+
+plot(ot[1,], ot[2,], axes=F)
+y<- ot[2,]
+s<-which(y!='0')
+y<-y[s]
+y<-as.numeric(y)
+plot(ot[1,s],y)
+axis(1, labels=ot[3,], at=ot[1,])
+axis(2)
+
+ot <- open_tickets(t2, '2004/1/1', '2010/2/28', 'day', "%b%y")
+x1<-as.numeric(ot[1,])
+y1<-as.numeric(ot[2,])
+
+# remove zero
+#s<-which(y1!='0')
+#y1<-y1[s]
+#x1<-x1[s]
+
+y1<-as.numeric(y1)
+
+lines(x1, y1, axes=F, pch='.')
+axis(1, labels=ot[3,], at=ot[1,])
+axis(2)
+#lines(ot[1,], ot[2,])
+#a<-smooth(as.numeric(y1))
+#x<-x1
+#y<-a
+
+a<-lowess(x1, y1, delta=(60*60*24), f=0.03)
+x<-a$x
+y<-a$y
+
+#y<-rollmedian(y1, 5)
+#x<-x1[1:length(y)]
+
+lines(x, y, col='red')
+lines(x, round(y), col='blue')
+#lines(x, ceiling(y), col='blue')
+
+abline_at_date('2005-01-01', 'grey40')
+abline_at_date('2006-01-01', 'grey40')
+abline_at_date('2007-01-01', 'grey40')
+abline_at_date('2008-01-01', 'grey40')
+abline_at_date('2009-01-01', 'grey40')
+abline_at_date('2010-01-01', 'grey40')
+
index 62b63ff..1f31e9f 100644 (file)
@@ -103,23 +103,39 @@ y_online_node_list <- c(330, 480,  500,    550,  575,  642)
 y<- rbind(support=s_list, monitor=m_list)
 barplot(y, space=0.5, width=1, ylim=c(0,2000), xlim=c(0,9),  
         col=c('grey35', 'grey85'),
-        legend=F, ylab="Messages with One or More Replies", xlab="Year")
+        legend=F, ylab="Tickets with One or More Replies", xlab="Year", axes=F)
 scale_by <- 1500 / 700
 lines(x_online_node_list, y_online_node_list*scale_by)
 points(x_online_node_list, y_online_node_list*scale_by, pch=c(22))
 ticks<-c(0, 100, 200, 300, 400, 500, 600, 700)
 
 axis(1, labels=c('2004', '2005', '2006', '2007', '2008', '2009'), at=x_online_node_list)
-axis(4, labels=ticks, at=ticks*scale_by)
+axis(2, las=1)
+axis(4, las=1, labels=ticks, at=ticks*scale_by)
 
 mtext("Online Node Count", 4, line=3)
 legend(6.5, 2000, 
         cex=0.7,
-        legend=c("Online Node Count", "MyOps Messages", "Support Messages"), 
+        legend=c("Online Node Count", "MyOps Tickets", "Support Tickets"), 
          fill=c(0, 'grey85', 'grey40'),
         lty=c(1,0,0), merge=T)
 end_image()
 
+start_image('rt_tickets_per_machine.png')
+t_list <- s_list + m_list
+plot(x_online_node_list, t_list / y_online_node_list, ylim=c(0,5), space=0.5, width=1, legend=F, ylab="Tickets per Online Node", xlab="Year", xlim=c(0,9), axes=F, type='l')
+points(x_online_node_list, t_list / y_online_node_list, ylim=c(0,5), xlim=c(0,9), pch=c(23))
+axis(1, labels=c('2004', '2005', '2006', '2007', '2008', '2009'), at=x_online_node_list)
+axis(2)
+legend(5.5, 1, 
+        cex=0.7,
+        legend=c("Tickets per Online Node"), 
+        pch=c(23),
+        #fill=c('grey85'),
+        lty=c(1), merge=T)
+
+end_image()
+
 
 start_image("rt_monitor_seasonal.png")
 par(mfrow=c(3,1))
diff --git a/statistics/traffic_and_nodes.r b/statistics/traffic_and_nodes.r
new file mode 100644 (file)
index 0000000..89a6029
--- /dev/null
@@ -0,0 +1,225 @@
+source("functions.r");
+
+
+available_nodes <- function (ns, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # get range from ns
+        ns_sub <- ns[which(ns$date > hbreaks[i] & ns$date <= hbreaks[i+1] & ns$status == 'BOOT'),]
+        nodes <- length(ns_sub$date)
+
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, nodes)
+
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,m))
+}
+
+
+
+open_tickets <- function (t, from, to, type, fmt="%b")
+{
+    # find 'type' range of days
+    dates <-seq(as.Date(from), as.Date(to), type)
+    months <- format(dates, fmt)
+    hbreaks<-unclass(as.POSIXct(dates))
+
+    xx<-NULL;
+    yy<-NULL;
+
+    for ( i in seq(1,length(hbreaks)-1) )
+    {
+        # identify any tickets with a start time in range, lastreply in range
+        # or where both start is less and lastreply is greater than the range
+        t_sub <- t[which( (t$start < hbreaks[i] & t$lastreply > hbreaks[i+1]) | 
+                          (t$start > hbreaks[i] & t$start <= hbreaks[i+1]) | 
+                          (t$lastreply > hbreaks[i] & t$lastreply <= hbreaks[i+1]) ),]
+        tickets <- length(t_sub$start)
+        #if ( nrow(t_sub) > 0 ){
+        #    for ( j in seq(1,nrow(t_sub)) )
+        #    {
+        #        #print(sprintf("id %s, date %s", t_sub[i,'ticket_id'], t_sub[i,'s1']))
+        #        print(sprintf("id %s, date %s", t_sub[j,]$ticket_id, t_sub[j, 's1']))
+        #    }
+        #}
+
+        xx<- c(xx, hbreaks[i])
+        yy<- c(yy, tickets)
+
+    }
+    m<- months[1:length(months)-1]
+    return (rbind(xx,yy,m))
+}
+
+online_nodes <- function (fb)
+{
+    breaks <- unique(fb$timestamp)
+    n<-NULL
+    o<-NULL
+    x<-NULL
+    for (i in seq(1,length(breaks)) )
+    {
+        ts <- breaks[i]
+        sub <- fb[which(fb$timestamp == ts),]
+        node_count   <- length(unique(sub$hostname))
+        online_count <- length(unique(sub$hostname[which(sub$state=='BOOT')]))
+        x<-c(x,ts)
+        n<-c(n,node_count)
+        o<-c(o,online_count)
+    }
+    print(length(x))
+    print(length(n))
+    print(length(o))
+    return (rbind(x,n,o))
+}
+
+lowess_smooth <- function (x, y, delta=(60*60*24), f=0.02)
+{
+    a<-lowess(x, y, delta=delta, f=f)
+    return (a);
+}
+
+#####
+
+ns <- read.csv('node-status-jun09-feb10.csv', sep=',', header=TRUE)
+an <- available_nodes(ns, "2009-06-10", "2010-02-28", 'day')
+
+an_x<-an[1,][which(as.numeric(an[2,]) > 100)]
+an_y<-an[2,][which(as.numeric(an[2,]) > 100)]
+
+
+####
+#fb7 <- read.csv('findbad_raw_2007.csv', sep=',', header=TRUE)
+#fb8 <- read.csv('findbad_raw_2008.csv', sep=',', header=TRUE)
+#fb9 <- read.csv('findbad_raw_2009.csv', sep=',', header=TRUE)
+
+z7<- online_nodes(fb7)
+z8<- online_nodes(fb8)
+z9<- online_nodes(fb9)
+
+zx <- c(z7[1,],z8[1,],z9[1,])
+zy_reg <- c(z7[2,], z8[2,],z9[2,])
+zy_avail <- c(z7[3,], z8[3,],z9[3,])
+
+start_image("rt_aggregate_node_traffic.png")
+par(mfrow=c(2,1))
+par(mai=c(0,1,0.1,0.1))
+
+a_reg<-lowess_smooth(zx, zy_reg)
+plot(a_reg$x, a_reg$y, 
+     ylim=c(0,700), xlim=c(min(x1), max(x1)), type='l', pch='.', axes=F,
+     ylab="Online Node Count", xlab="")
+       
+sx <- zx[which(zy_avail > 330)]
+sy <- zy_avail[which(zy_avail > 330)]
+sx <- c(sx[1:2037],sx[2061:length(sx)])
+sy <- c(sy[1:2037],sy[2061:length(sy)])
+
+sx <- c(sx[1:1699],sx[1701:1707],sx[1709:length(sx)])
+sy <- c(sy[1:1699],sy[1701:1707],sy[1709:length(sy)])
+
+lines(sx, sy, col='grey80', pch='.')
+lines(an_x, an_y, col='grey80', pch='.')
+
+a_avail<-lowess_smooth(zx, zy_avail)
+lines(a_avail$x, a_avail$y, col='red', pch='.')
+
+a_avail_m3<-lowess_smooth(an_x, an_y)
+lines(a_avail_m3$x, a_avail_m3$y, col='red', pch='.')
+
+axis(2, las=1)
+
+x_online_node_list <- c(tstamp("2004-6-1"), tstamp("2005-6-1"), tstamp("2006-6-1"), tstamp("2007-11-1"))
+y_online_node_list <- c(330, 480,  500,  550)
+lines(x_online_node_list, y_online_node_list, col='grey80')
+
+#abline_at_date('2005-01-01', 'grey60')
+#abline_at_date('2006-01-01', 'grey60')
+#abline_at_date('2007-01-01', 'grey60')
+#abline_at_date('2008-01-01', 'grey60')
+#abline_at_date('2009-01-01', 'grey60')
+#abline_at_date('2010-01-01', 'grey60')
+
+tstamp_20041201 <-abline_at_date("2004-12-01", col='grey60', lty=2)
+tstamp_20050301 <-abline_at_date("2005-03-01", col='grey60', lty=2)
+tstamp_20050701 <-abline_at_date("2005-07-01", col='grey60', lty=2)
+tstamp_20051101 <-abline_at_date("2005-11-01", col='grey60', lty=2)
+tstamp_20051201 <-abline_at_date("2005-12-01", col='grey60', lty=2)
+tstamp_20070101 <-abline_at_date("2007-01-01", col='grey60', lty=2)
+tstamp_20070501 <-abline_at_date("2007-05-01", col='grey60', lty=2)
+tstamp_20080601 <-abline_at_date("2008-06-01", col='grey60', lty=2)
+tstamp_20080815 <-abline_at_date("2008-08-15", col='grey60', lty=2)
+tstamp_20090501 <-abline_at_date("2009-05-01", col='grey60', lty=2)
+tstamp_20100201 <-abline_at_date("2010-02-01", col='white', lty=2)
+
+
+text(x=c( tstamp_20041201+(tstamp_20050301-tstamp_20041201)/2,
+        tstamp_20050301+(tstamp_20050701-tstamp_20050301)/2,
+        tstamp_20050701+(tstamp_20051101-tstamp_20050701)/2,
+        tstamp_20051201+(tstamp_20070101-tstamp_20051201)/2,
+        tstamp_20070101+(tstamp_20070501-tstamp_20070101)/2,
+        tstamp_20080601+(tstamp_20080815-tstamp_20080601)/2,
+        tstamp_20090501+(tstamp_20100201-tstamp_20090501)/2 ),
+     y=c(700),
+     labels=c('3.0', '3.1', '3.1S', '3.2', '4.0', '4.2', '4.3')) 
+
+par(mai=c(1,1,0.1,0.1))
+# system("parse_rt_data.py 3 > rt_data.csv");
+
+t <- read.csv('rt_data_2004-2010.csv', sep=',', header=TRUE)
+t2 <- t[which(t$complete == 1),]
+ot <- open_tickets(t2, '2004/1/1', '2010/2/28', 'day', "%b")
+x1<-as.numeric(ot[1,])
+y1<-as.numeric(ot[2,])
+
+a_ot<-lowess_smooth(x1, y1)
+
+plot(x1, y1, col='grey80', type='l', axes=F, ylab="Open Tickets", xlab="Date") # , ylim=c(0,260))
+lines(a_ot$x, round(a_ot$y), col='red')
+
+axis(1, labels=ot[3,], at=ot[1,], cex.axis=0.7)
+axis(2, las=1)
+mtext("2004           2005           2006           2007           2008           2009", 1,2)
+
+abline_at_date('2005-01-01', 'grey60')
+abline_at_date('2006-01-01', 'grey60')
+abline_at_date('2007-01-01', 'grey60')
+abline_at_date('2008-01-01', 'grey60')
+abline_at_date('2009-01-01', 'grey60')
+abline_at_date('2010-01-01', 'grey60')
+abline(h=25, lty=2, col='grey80')
+abline(h=40, lty=2, col='grey80')
+end_image()
+
+
+m <- read.csv('rt_monitor_data.csv', sep=',', header=TRUE)
+m2 <- m[which(m$complete == 1),]
+otm <- open_tickets(m2, '2004/1/1', '2010/2/28', 'day', "%b")
+xm<-as.numeric(otm[1,])
+ym<-as.numeric(otm[2,])
+
+a<-lowess(xm, ym, delta=(60*60*24), f=0.02)
+x<-a$x
+y<-a$y
+lines(x, round(y), col='blue')
+
+#end_image()
+#t_july08 <-unclass(as.POSIXct("2008-07-01", origin="1970-01-01"))[1]
+#breaks <- unique(fb8$timestamp[which(fb8$timestamp < t_july08)])
+#fb8_boot <- fb8$timestamp[which(fb8$state=="BOOT" & fb8$timestamp < t_july08)]
+#h8<-hist(fb8_boot, breaks=breaks[which(!is.na(breaks) & breaks!=0)])
+#
+#breaks <- unique(as.numeric(as.character(fb9$timestamp)))
+#fb9_boot <- as.numeric(as.character(fb9$timestamp[which(fb9$state=="BOOT")]))
+#hist(fb9_boot, breaks=breaks[which(!is.na(breaks) & breaks >= 1230775020)])
+