S-PLUS : Copyright (c) 1988, 1996 MathSoft, Inc. S : Copyright AT&T. Version 3.4 Release 1 for Sun SPARC, SunOS 5.3 : 1996 Working data will be in .Data > > # Dauk, P.C. and Schwarz, C.J. (2001) > # Catch Estimation with Restricted Randomization in the Effort Survey > # Biometrics, **, ***-*** > > # This SPLUS file will compute the estimates from the Fraser River using > # in the paper. > > # To run in batch mode, use Splus BATCH example.splus example.out > options(echo=T) # echo commands and output > > # First define all the functions needed, then do the analysis > > > ######### ACCESS SURVEY - mean of ratio estimator ######### > access.mor <- function( sample, overflight) { + # Compute the mean of ratio estimator from access survey data + # arguments: sample - matrix of 3 columns x n rows. + # col1 = start time of the episode + # col2 = end time of the episode + # col3 = catch + # overflight - matrix of n rows x 2 columns - one row for each overflight + # col1 = time of overflight + # col2 = count of active episodes at that time + # + + # first find out which episodes were active during each overflight + # some fishing episodes go over day boundaries. As we are treating all five days on the + # river as "identical", we must also see if they were active on the next days "overflight" as well. + + # if the overflight is a vector, then force into a matrix + overflight <- matrix(overflight,ncol=2);+ + active1 <- outer(sample[,1], overflight[,1], "<") & + outer(sample[,2], overflight[,1], ">");+ active2 <- outer(sample[,1]-1440, overflight[,1], "<") & # check for next day activity + outer(sample[,2]-1440, overflight[,1], ">");+ active <- active1 | active2;+ + episodes <- nrow(sample);+ active.counts <- apply(active, c(2), sum);+ f <- sum(active.counts)/sum(overflight[,2]);+ + # first compute the individual estimates and the variance covariance matrix + R.hat.i <- sum(sample[,3]) / active.counts # individual R-hats;+ C.hat.i <- R.hat.i * overflight[,2] # individual Catch estimates;+ + diff <- sample[,3]- active %*% diag(R.hat.i,nrow=nrow(overflight));+ vcv <- t(diff) %*% diff / (episodes-1)*episodes/f*(1/f-1);+ se.i <- sqrt(diag(vcv));+ + + # now for the weighted averages. + # First a simple average + + w <- matrix(1/nrow(overflight), nrow=1, ncol=nrow(overflight));+ C.hat <- sum(w*C.hat.i);+ C.hat.se <- sqrt( w %*% vcv %*% t(w));+ + # now for the optimal weighting + w <- matrix(1, nrow=1, ncol=nrow(overflight));+ w <- w %*% solve(vcv)/ sum(solve(vcv));+ + C.hat.opt <- sum(w*C.hat.i);+ C.hat.opt.se <- sqrt( w %*% vcv %*% t(w));+ + + list(episodes=episodes, active.counts=active.counts, C.hat.i=C.hat.i, C.hat.i.se = se.i, + C.hat=C.hat, C.hat.se=C.hat.se , C.hat.opt=C.hat.opt, C.hat.opt.se=C.hat.opt.se, w.opt=w);+ + } > > > > ######### ACCESS SURVEY - ratio of means estimator ######### > > access.rom <- function( sample, overflight) { + # Compute the ratio of means estimator from access survey data + # arguments: sample - matrix of 3 columns x n rows. + # col1 = start time of the episode + # col2 = end time of the episode + # col3 = catch + # overflight - matrix of n rows x 2 columns - one row for each overflight + # col1 = time of overflight + # col2 = count of active episodes at that time + # + + # force the overflight to be a matrix + overflight <- matrix(overflight, ncol=2);+ + # first find out which episodes were active during each overflight + # some fishing episodes go over day boundaries. As we are treating all five days on the + # river as "identical", we must also see if they were active on the next days "overflight" as well. + + active1 <- outer(sample[,1], overflight[,1], "<") & + outer(sample[,2], overflight[,1], ">");+ active2 <- outer(sample[,1]-1440, overflight[,1], "<") & # check for next day activity + outer(sample[,2]-1440, overflight[,1], ">");+ active <- active1 | active2;+ + episodes <- nrow(sample);+ active.counts <- apply(active, c(2), sum);+ f <- sum(active.counts)/sum(overflight[,2]);+ + + R.hat <- sum(sample[,3]) / sum(active) ;+ C.hat <- R.hat * sum(overflight[,2]) ;+ + diff <- sample[,3]- R.hat*apply(active, c(1), sum);+ se <- sum(diff*diff)/(episodes-1)*episodes/f*(1/f-1);+ se <- sqrt(se);+ + list(episodes=episodes, active.counts=active.counts, C.hat=C.hat, C.hat.se = se);+ + } > > > > ######### ROVING SURVEY - mean of ratio estimator ######### > > roving.mor <- function( sample, overflight) { + # Compute the mean of ratio estimator from ROVING survey data + # arguments: sample - matrix of 4 columns x n rows. + # col1 = start time of the episode + # col2 = time of interview + # col3 = end time of the episode + # col4 = catch + # overflight - matrix of n rows x 2 columns - one row for each overflight + # col1 = time of overflight + # col2 = count of active episodes at that time + # + + # force the overflight to be a matrix + overflight <- matrix(overflight, ncol=2);+ + # first find out which episodes were active during each overflight + # some fishing episodes go over day boundaries. As we are treating all five days on the + # river as "identical", we must also see if they were active on the next days "overflight" as well. + + active1 <- outer(sample[,1], overflight[,1], "<") & + outer(sample[,3], overflight[,1], ">");+ active2 <- outer(sample[,1]-1440, overflight[,1], "<") & # check for next day activity + outer(sample[,3]-1440, overflight[,1], ">");+ active <- active1 | active2;+ + catch <- sample[,4];+ + episodes <- nrow(sample);+ Lstar <- sample[,3]-sample[,1]+1 # length of entire episode;+ L <- sample[,2]-sample[,1]+1 # time from start to time of interview;+ TT <- 24*60 # assuming that the fishing day is 1 day = 24 hours long;+ pi <- Lstar/ (24*60) # prob of selection of episode;+ + active.counts <- apply(active, c(2), sum);+ f <- sum(active.counts)/sum(overflight[,2]);+ + N.hat <- TT* sum(1/Lstar);+ + # estimate the individual estimates from each overflight survey + + R.hat.i <- sum( catch/L) / ((1/Lstar) %*% active);+ C.hat.i <- R.hat.i * overflight[,2] ;+ + diff <- (catch*Lstar/L) %*% matrix(1,nrow=1,ncol=nrow(overflight)) - (active %*% diag(R.hat.i,nrow=nrow(overflight)));+ vcv <- t(diff) %*% diag((1-pi)/pi/pi) %*% diff ;+ vcv <- vcv + diag(N.hat/episodes*TT*sum(catch/L), nrow=nrow(overflight));+ se.i <- sqrt(diag(vcv));+ + + # now for the weighted averages. + # First a simple average + + w <- matrix(1/nrow(overflight), nrow=1, ncol=nrow(overflight));+ C.hat <- sum(w*C.hat.i);+ C.hat.se <- sqrt( w %*% vcv %*% t(w));+ + # now for the optimal weighting + w <- matrix(1, nrow=1, ncol=nrow(overflight));+ w <- w %*% solve(vcv)/ sum(solve(vcv));+ + C.hat.opt <- sum(w*C.hat.i);+ C.hat.opt.se <- sqrt( w %*% vcv %*% t(w));+ + + list(episodes=episodes, active.counts=active.counts, C.hat.i=C.hat.i, C.hat.i.se = se.i, + C.hat=C.hat, C.hat.se=C.hat.se , C.hat.opt=C.hat.opt, C.hat.opt.se=C.hat.opt.se, w.opt=w);+ + } > > > > > ######### ROVING SURVEY - ratio of means estimator ######### > roving.rom <- function( sample, overflight) { + # Compute the ratio of means estimator from ROVING survey data + # arguments: sample - matrix of 4 columns x n rows. + # col1 = start time of the episode + # col2 = time of interview + # col3 = end time of the episode + # col4 = catch + # overflight - matrix of n rows x 2 columns - one row for each overflight + # col1 = time of overflight + # col2 = count of active episodes at that time + # + + # force the overflight to be a matrix + overflight <- matrix(overflight, ncol=2);+ + # first find out which episodes were active during each overflight + # some fishing episodes go over day boundaries. As we are treating all five days on the + # river as "identical", we must also see if they were active on the next days "overflight" as well. + + active1 <- outer(sample[,1], overflight[,1], "<") & + outer(sample[,3], overflight[,1], ">");+ active2 <- outer(sample[,1]-1440, overflight[,1], "<") & # check for next day activity + outer(sample[,3]-1440, overflight[,1], ">");+ active <- active1 | active2;+ + catch <- sample[,4];+ + episodes <- nrow(sample);+ Lstar <- sample[,3]-sample[,1]+1 # length of entire episode;+ L <- sample[,2]-sample[,1]+1 # time from start to time of interview;+ TT <- 24*60 # assuming that the fishing day is 1 day = 24 hours long;+ pi <- Lstar/ (24*60) # prob of selection of episode;+ + active.counts <- apply(active, c(2), sum);+ f <- sum(active.counts)/sum(overflight[,2]);+ + N.hat <- TT* sum(1/Lstar);+ + + R.hat <- sum( catch/L) / sum((1/Lstar) %*% active );+ C.hat <- R.hat * sum(overflight[,2]) ;+ + diff <- catch*Lstar/L - R.hat*apply(active,c(1),sum);+ + var <- N.hat/episodes*TT*sum(catch/L) + sum( (1-pi)/pi*diff*diff/pi);+ se <- sqrt(var);+ + list(episodes=episodes, active.counts=active.counts, C.hat=C.hat, C.hat.se = se);+ + } > > > ######################################################################################################## > # --------------- PART I ----------------------------------- > # The access survey > # > > # The access sample is a simple random sample from an access point. > # The recorded values are start time (minutes after midnight), > # end time (minutes after midnight), > # catch (number of fish). > # Note that some episodes spanned day bounaries. For example, episode 3, went to 12:30 in > # the next day. As we are treating all days as having overflights at the same times, > # you must also check if an episode was active the next day at the overflight times. > > access.sample <- matrix( + c( + # start.time end.time fish.caught , + 540, 660, 3, + 600, 750, 22, + 1260, 1475, 1 , + 1020, 1480, 8 , + 1200, 1485, 19 , + 1260, 1505, 48 , + 1020, 1770, 9 , + 1200, 1780, 15 , + 990, 1110, 27 , + 1230, 2021, 50, + 480, 600, 0, + 330, 567, 40, + 390, 580, 60, + 1320, 1988, 39, + 1350, 2040, 80, + 440, 540, 8, + 690, 1595, 60, + 420, 617, 24, + 330, 580, 55, + 300, 830, 30, + 1110, 1960, 5, + 420, 470, 7, + 1260, 1935, 12, + 930, 1879, 27, + 1290, 1895, 65, + 1200, 1996, 10, + 1140, 2024, 17, + 300, 597, 80, + 780, 900, 7, + 900, 1010, 8, + 1010, 1146, 13, + 455, 1210, 10, + 1200, 1230, 14, + 990, 1260, 30, + 1146, 1423, 14, + 1320, 1830, 72, + 1080, 1875, 7, + 1380, 1890, 2, + 1200, 1901, 30, + 1200, 1890, 12, + 1080, 1860, 27, + 1320, 1950, 12, + 1140, 1980, 14, + 480, 564, 15, + 660, 870, 17, + 870, 1770, 14, + 1260, 1815, 5, + 1230, 1860, 5, + 1080, 1200, 24), ncol=3, byrow=T , dimnames=list(NULL, c("start","end","catch"))) > > > # Overflights . Values recorded are > # time (minutes after midnight), > # total angling parties active at that time > > access.overflight <- matrix( + # time count + c( 515, 235, + 744, 175), ncol=2, byrow=T, dimnames=list(NULL, c("time","count"))) > > > # list the data > > access.sample start end catch [1,] 540 660 3 [2,] 600 750 22 [3,] 1260 1475 1 [4,] 1020 1480 8 [5,] 1200 1485 19 [6,] 1260 1505 48 [7,] 1020 1770 9 [8,] 1200 1780 15 [9,] 990 1110 27 [10,] 1230 2021 50 [11,] 480 600 0 [12,] 330 567 40 [13,] 390 580 60 [14,] 1320 1988 39 [15,] 1350 2040 80 [16,] 440 540 8 [17,] 690 1595 60 [18,] 420 617 24 [19,] 330 580 55 [20,] 300 830 30 [21,] 1110 1960 5 [22,] 420 470 7 [23,] 1260 1935 12 [24,] 930 1879 27 [25,] 1290 1895 65 [26,] 1200 1996 10 [27,] 1140 2024 17 [28,] 300 597 80 [29,] 780 900 7 [30,] 900 1010 8 [31,] 1010 1146 13 [32,] 455 1210 10 [33,] 1200 1230 14 [34,] 990 1260 30 [35,] 1146 1423 14 [36,] 1320 1830 72 [37,] 1080 1875 7 [38,] 1380 1890 2 [39,] 1200 1901 30 [40,] 1200 1890 12 [41,] 1080 1860 27 [42,] 1320 1950 12 [43,] 1140 1980 14 [44,] 480 564 15 [45,] 660 870 17 [46,] 870 1770 14 [47,] 1260 1815 5 [48,] 1230 1860 5 [49,] 1080 1200 24 > access.overflight time count [1,] 515 235 [2,] 744 175 > > > # compute the ratio-of-means estimators for the access survey > > access.rom (access.sample, access.overflight) $episodes: [1] 49 $active.counts: [1] 17 5 $C.hat: [1] 21860.45 $C.hat.se: [1] 4184.716 > > > # compute the means-of-ratio estimators for the access survey > > access.mor (access.sample, access.overflight) $episodes: [1] 49 $active.counts: [1] 17 5 $C.hat.i: [1] 16215 41055 $C.hat.i.se: [1] 4380.719 9346.011 $C.hat: [1] 28635 $C.hat.se: [,1] [1,] 5369.223 $C.hat.opt: [1] 19976.18 $C.hat.opt.se: [,1] [1,] 4116.953 $w.opt: [,1] [,2] [1,] 0.8485839 0.1514161 > > > > > > > # --------------- PART II ----------------------------------- > # The roving survey > # > > # The roving sample moves though the fishing site inteviewing anglers > # The recorded values are start time (minutes after midnight), > # interview time (minutes after midnight) > # end time (minutes after midnight), > # catch (number of fish). > # Note that some episodes spanned day bounaries. For example, episode 3, went to 12:30 in > # the next day. As we are treating all days as having overflights at the same times, > # you must also check if an episode was active the next day at the overflight times. > > # Note the episode lengths are computed as (end-start+1) > > roving.sample <- matrix( + c( + # start.time interview.time end.time fish.caught , + 600, 702, 780, 20, + 480, 726, 1020, 28, + 1080, 2053, 2220, 15, + 480, 617, 840, 2, + 420, 654, 660, 17 , + 570, 690, 700, 58 , + 360, 779, 960, 38, + 1200, 1963, 2130, 74 , + 1020, 1973, 1974, 25, + 630, 677, 780, 4, + 630, 685, 780, 2 , + 780, 868, 869, 4 , + 1110, 1160, 1740, 14 , + 810, 1490, 2210, 40 , + 1320, 1510, 2160, 3 , + 1320, 1872, 2160, 5 , + 1320, 1956, 2160, 14 , + 1320, 2135, 2175, 35 , + 480, 638, 639, 1, + 240, 640, 720, 70 , + 1080, 2017, 2018, 14 , + 480, 621, 720, 14 , + 1080, 2063, 2065, 20 , + 600, 777, 840, 31 , + 780, 907, 960, 14 , + 420, 482, 1320, 5, + 480, 517, 1200, 3 , + 720, 838, 1439, 0 , + 360, 879, 900, 1 , + 1080, 1105, 1290, 7 , + 1290, 1420, 1530, 5 , + 630, 834, 960, 12 , + 525, 609, 690, 2 , + 439, 940, 960, 20 , + 600, 890, 1080, 6 , + 570, 685, 840, 7 , + 350, 385, 1320, 3 , + 360, 400, 720, 30 , + 375, 405, 1320, 3 , + 330, 412, 480, 27 , + 470, 500, 1320, 2 , + 563, 623, 750, 4 , + 630, 660, 1320, 1 , + 600, 670, 1200, 5 , + 600, 840, 1200, 0 , + 390, 428, 1320, 1 , + 420, 454, 1320, 2 , + 360, 609, 1320, 10 , + 360, 618, 1320, 14 , + 595, 627, 1320, 0 , + 570, 631, 1320, 6 , + 360, 638, 1320, 10 , + 570, 648, 840, 15 , + 620, 652, 1320, 7 , + 450, 669, 720, 25 , + 600, 780, 1320, 4 , + 750, 795, 1320, 1 , + 885, 915, 1050, 5 , + 1050, 1165, 1320, 1 , + 570, 1200, 1320, 4 ), ncol=4, byrow=T , dimnames=list(NULL, c("start","interview", "end","catch"))) > > > # Overflights . Values recorded are > # time (minutes after midnight) > # total angling parties active at that time > # > roving.overflight <- matrix( + # time count, + c( 515, 235, + 744, 175), ncol=2, byrow=T, dimnames=list(NULL, c("time","count"))) > > > # list the data > > roving.sample start interview end catch [1,] 600 702 780 20 [2,] 480 726 1020 28 [3,] 1080 2053 2220 15 [4,] 480 617 840 2 [5,] 420 654 660 17 [6,] 570 690 700 58 [7,] 360 779 960 38 [8,] 1200 1963 2130 74 [9,] 1020 1973 1974 25 [10,] 630 677 780 4 [11,] 630 685 780 2 [12,] 780 868 869 4 [13,] 1110 1160 1740 14 [14,] 810 1490 2210 40 [15,] 1320 1510 2160 3 [16,] 1320 1872 2160 5 [17,] 1320 1956 2160 14 [18,] 1320 2135 2175 35 [19,] 480 638 639 1 [20,] 240 640 720 70 [21,] 1080 2017 2018 14 [22,] 480 621 720 14 [23,] 1080 2063 2065 20 [24,] 600 777 840 31 [25,] 780 907 960 14 [26,] 420 482 1320 5 [27,] 480 517 1200 3 [28,] 720 838 1439 0 [29,] 360 879 900 1 [30,] 1080 1105 1290 7 [31,] 1290 1420 1530 5 [32,] 630 834 960 12 [33,] 525 609 690 2 [34,] 439 940 960 20 [35,] 600 890 1080 6 [36,] 570 685 840 7 [37,] 350 385 1320 3 [38,] 360 400 720 30 [39,] 375 405 1320 3 [40,] 330 412 480 27 [41,] 470 500 1320 2 [42,] 563 623 750 4 [43,] 630 660 1320 1 [44,] 600 670 1200 5 [45,] 600 840 1200 0 [46,] 390 428 1320 1 [47,] 420 454 1320 2 [48,] 360 609 1320 10 [49,] 360 618 1320 14 [50,] 595 627 1320 0 [51,] 570 631 1320 6 [52,] 360 638 1320 10 [53,] 570 648 840 15 [54,] 620 652 1320 7 [55,] 450 669 720 25 [56,] 600 780 1320 4 [57,] 750 795 1320 1 [58,] 885 915 1050 5 [59,] 1050 1165 1320 1 [60,] 570 1200 1320 4 > roving.overflight time count [1,] 515 235 [2,] 744 175 > > > # compute the ratio-of-means estimators for the roving survey > > roving.rom (roving.sample, roving.overflight) $episodes: [1] 60 $active.counts: [1] 31 35 $C.hat: [1] 17108.89 $C.hat.se: [1] 1465.263 > > > # compute the means-of-ratio estimators for the roving survey > > roving.mor (roving.sample, roving.overflight) $episodes: [1] 60 $active.counts: [1] 31 35 $C.hat.i: [,1] [,2] [1,] 23503.41 12530.79 $C.hat.i.se: [1] 1736.678 1765.027 $C.hat: [1] 18017.1 $C.hat.se: [,1] [1,] 1448.047 $C.hat.opt: [1] 18157.63 $C.hat.opt.se: [,1] [1,] 1447.827 $w.opt: [,1] [,2] [1,] 0.5128075 0.4871925 > > >