count bout length in sequences of different activities - posixct

I have a dataset where I need to calculate bouts lengths of a very big dataset (65400 rows!). when we were collecting the data we recorded what our focal animal was doing in each minute and second during a 5 minute time session. My sample data is as
structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "02/04/2015", class = "factor"), minute = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), sec = 1:11, activity = structure(c(2L,
2L, 2L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L), .Label = c("N", "S",
"U"), class = "factor"), day_time = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "14:45", class = "factor")), .Names = c("date",
"minute", "sec", "activity", "day_time"), class = "data.frame", row.names = c(NA,
-11L))
df
date minute sec activity day_time
1 02/04/2015 1 1 S 14:45
2 02/04/2015 1 2 S 14:45
3 02/04/2015 1 3 S 14:45
4 02/04/2015 1 4 N 14:45
5 02/04/2015 1 5 U 14:45
6 02/04/2015 1 6 U 14:45
7 02/04/2015 1 7 U 14:45
8 02/04/2015 1 8 S 14:45
9 02/04/2015 1 9 S 14:45
10 02/04/2015 1 10 S 14:45
11 02/04/2015 1 11 S 14:45
what we need to calculate is the length of each bout (in seconds) keeping the 5 minute sessions separate. in this case our desired output will be
structure(list(date = structure(c(1L, 1L, 1L, 1L), .Label = "02/04/2015", class = "factor"),
minute = c(1L, 1L, 1L, 1L), activity = structure(c(2L, 1L,
3L, 2L), .Label = c("N", "S", "U"), class = "factor"), day_time = structure(c(1L,
1L, 1L, 1L), .Label = "14:45", class = "factor"), bout_length = c(3L,
1L, 3L, 4L)), .Names = c("date", "minute", "activity", "day_time",
"bout_length"), class = "data.frame", row.names = c(NA, -4L))
desired output
date minute activity day_time bout_length
1 02/04/2015 1 S 14:45 3
2 02/04/2015 1 N 14:45 1
3 02/04/2015 1 U 14:45 3
4 02/04/2015 1 S 14:45 4
I have tried (rle) without success since I have to specify different minute and sessions. thank you for helping out

I finally got my way round it. It would be nice if there is a short way to it
bouts1 <- as.data.frame(lapply(df, as.character), stringsAsFactors = FALSE)
bouts1<-head(do.call(rbind, by(bouts1, df$session, rbind, "empty")), -1)
rownames(bouts1) <- seq(length=nrow(bouts1))# this renames row names in my dataframe
diffs <- bouts1$activity[-1L] != bouts1$activity[-length(bouts1$activity)]#tells us where the activity is diff
idx <- c(which(diffs), length(bouts1$activity))
bout.len<-diff(c(0, idx))
trial <- c(which(diffs=="TRUE"))#these are the row IDs that contain activities that change
new.bouts<-bouts1[row.names(bouts1) %in% trial,]# extracting the rows in the data where activity changes
temp<-bouts[65400,]#took the last observation from the main data set.
new.bouts <- rbind(new.bouts,temp)#these 2 commands I had to create another line so that we get where to put e value
new.bouts$bout.len<-bout.len
new.bouts<-new.bouts[!new.bouts$Date=="empty",]

Related

Unexplainable error message in picewiseSEM

I would like to run a pathanalysis using the R-package piecewiseSEM.
I used a very simple model structure based on the example in the piecewiseSEM description
Unfortunatley, once I execute the "as.psem" function I get the error "Error in [.data.frame(x$data, , vars) : undefined columns selected"
I checked my data and the model structure and all should be fine. Does anybody know why I still get the error?
I posted my simple code and the data set below.
Thanks a ton
Mike
#CODE------------------------------------------------------------
data2$Year<-as.factor(data2$Year)
data2$Drought<-as.factor(data2$Drought)
data2$Stratum<-as.factor(data2$Stratum)
CODE:
# 1x:
mod1x<-lmer(N_pc~Stratum:Drought+Year:Drought+(1|Tree.ID),data2)
mod11a<-glmer(cbind(branch_suck_Yes,branch_suck_No)~N_pc+(1|Tree.ID),
data=data2, family = binomial(link = "logit"))
modlist = list(
mod1x, mod11a)
model<-as.psem(modlist)#Error: Error in `[.data.frame`(x$data, , vars) : undefined columns selected
model<-psem(modlist, data2)
coefs(modlist, data2, standardize = "none", intercept = FALSE)
model<-psem(mod1x, mod11a)```
#DATA---------------------------------------------------------------
structure(list(N_pc = c(2.39, 2.81, 2.48, 1.83, 1.91, 1.96, 2.32,
2.54, 2.19, 1.97, 2.29, 1.64, 2.03, 1.68, 2.145, 2.08, 1.99,
1.99, 2.83, 2.83, 2.91, 2.61, 2.73, 2.54, 2.87, 1.91, 2.84, 2.74,
2.87, 2.6, 2.12, 2.64, 2.46, 1.83, 2.06, 2.77, 2.41, 2.74, 2.83,
2.51, 2.79, 2.66, 2.44, 2.26, 2.85, 2.39, 2.52, 2.13, 2.63, 2,
2.43, 2.36, 2.98, 2.28, 2.12, 2.2, 2.54, 1.28, 2.57, 2.17, 2.32,
2.41, 3.11, 2.591, 2.77, 2.53, 2.67, 2.45, 2.5, 2.52, 2.9, 3.03,
2.83, 2.52, 2.57, 2.62, 2.82, 2.62, 2.98, 3.01, 2.33, 2.11, 2.68,
2.74, 2.53, 2.43), Stratum = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("shade", "sun"), class = "factor"), Drought = structure(c(1L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("no", "yes"), class = "factor"),
Year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("1", "2"), class = "factor"), Tree.ID = structure(c(2L,
15L, 19L, 21L, 22L, 23L, 25L, 28L, 29L, 29L, 30L, 31L, 32L,
33L, 34L, 34L, 35L, 36L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L,
5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 10L, 10L, 11L, 11L, 12L,
13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L,
19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L, 24L, 24L,
25L, 25L, 26L, 26L, 27L, 27L, 28L, 28L, 29L, 29L, 32L, 32L,
33L, 33L, 34L, 34L, 35L, 35L, 36L, 36L, 37L, 37L), .Label = c("102_6",
"102_7", "102_8", "113_2", "113_4", "113_5", "114_7", "114_8",
"114_9", "116_6", "116_7", "116_9", "122_3", "122_5", "132_3",
"132_4", "132_5", "242_2", "242_4", "242_5", "243_1", "243_2",
"243_4", "245_1", "245_2", "245_5", "251_10", "251_8", "251_9",
"253_7", "253_8", "254_6", "254_7", "254_8", "267_10", "267_6",
"267_8"), class = "factor"), branch_suck_Yes = c(2L, 3L,
1L, 6L, 6L, 5L, 4L, 8L, 2L, 2L, 2L, 48L, 3L, 22L, 14L, 2L,
1L, 1L, 27L, 25L, 16L, 13L, 31L, 18L, 31L, 2L, 25L, 16L,
36L, 21L, 8L, 23L, 13L, 11L, 7L, 35L, 7L, 17L, 4L, 40L, 48L,
17L, 16L, 10L, 34L, 6L, 46L, 7L, 26L, 12L, 24L, 27L, 31L,
25L, 34L, 21L, 44L, 23L, 40L, 30L, 25L, 18L, 35L, 10L, 6L,
10L, 29L, 5L, 24L, 16L, 19L, 11L, 21L, 10L, 18L, 18L, 42L,
33L, 16L, 31L, 16L, 38L, 37L, 28L, 9L, 35L), branch_suck_No = c(48L,
47L, 49L, 44L, 44L, 45L, 46L, 42L, 48L, 48L, 48L, 2L, 47L,
28L, 36L, 48L, 49L, 49L, 23L, 25L, 34L, 37L, 19L, 32L, 19L,
48L, 25L, 34L, 14L, 29L, 42L, 27L, 37L, 39L, 43L, 15L, 43L,
33L, 46L, 10L, 2L, 33L, 34L, 40L, 16L, 44L, 4L, 43L, 24L,
38L, 26L, 23L, 19L, 25L, 16L, 29L, 6L, 27L, 10L, 20L, 25L,
32L, 15L, 40L, 44L, 40L, 21L, 45L, 26L, 34L, 31L, 39L, 29L,
40L, 32L, 32L, 8L, 17L, 34L, 19L, 34L, 12L, 13L, 22L, 41L,
15L)), row.names = c(3L, 43L, 51L, 55L, 57L, 59L, 63L, 76L,
77L, 78L, 80L, 81L, 86L, 87L, 89L, 90L, 92L, 93L, 97L, 98L, 99L,
100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L,
111L, 112L, 113L, 115L, 116L, 117L, 118L, 119L, 121L, 122L, 123L,
124L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L,
135L, 136L, 137L, 138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L,
146L, 147L, 148L, 149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L,
157L, 158L, 159L, 160L, 161L, 162L, 163L, 164L, 165L, 166L), na.action = structure(c(`1` = 1L,
`2` = 2L, `4` = 4L, `5` = 5L, `6` = 6L, `7` = 7L, `8` = 8L, `9` = 9L,
`10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L, `15` = 15L,
`16` = 16L, `17` = 17L, `18` = 18L, `19` = 19L, `20` = 20L, `21` = 21L,
`22` = 22L, `23` = 23L, `24` = 24L, `25` = 25L, `26` = 26L, `27` = 27L,
`28` = 28L, `29` = 29L, `30` = 30L, `31` = 31L, `32` = 32L, `33` = 33L,
`34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L, `39` = 39L,
`40` = 40L, `41` = 41L, `42` = 42L, `44` = 44L, `45` = 45L, `46` = 46L,
`47` = 47L, `48` = 48L, `49` = 49L, `50` = 50L, `52` = 52L, `53` = 53L,
`54` = 54L, `56` = 56L, `58` = 58L, `60` = 60L, `61` = 61L, `62` = 62L,
`64` = 64L, `65` = 65L, `66` = 66L, `67` = 67L, `68` = 68L, `69` = 69L,
`70` = 70L, `71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L, `75` = 75L,
`79` = 79L, `82` = 82L, `83` = 83L, `84` = 84L, `85` = 85L, `88` = 88L,
`91` = 91L, `94` = 94L, `95` = 95L, `96` = 96L, `114` = 114L,
`120` = 120L), class = "omit"), class = "data.frame")

How to superimpose broken line plot over bar plot with uneven number data points along x axis (date)?

I am trying to plot these one graph to show change of length at three different sites over time as temperature increases.
I shortened the temperature data frame to match the length data to create this plot: Length/Temperature vs Time
with this set of data:
structure(list(month = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L), .Label = c("Jan",
"Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
"Nov", "Dec"), class = c("ordered", "factor")), site = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L), .Label = c("port", "bluff", "palme"), class = "factor"),
mean = c(15.4066666666667, 14.7866666666667, 0, 38.3066666666667,
40.31, 52.06, 93.6266666666667, 84.6233333333333, 91.1083333333333,
51.6, 78.2766666666667, 96.9466666666667, 15.9633333333333,
59.1333333333333, 42.3233333333333, 10.3033333333333, 0.1,
0.1), sd = c(6.10053323465469, 5.62921770355553, 0, 14.1369211722314,
18.7262925984499, 18.0693224497518, 34.0724297822208, 43.6832697662534,
45.0430332232857, 26.4636380327995, 36.7493624934932, 44.660549074974,
6.0898719387098, 18.5524853495072, 16.3840478206244, 5.80234692905013,
0, 0), n = c(30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L), se = c(1.11379988847678,
1.02774983911493, 0, 2.58103687323451, 3.41893762487101,
3.29899183485452, 6.22074612691106, 7.9754374121868, 8.22369511828274,
4.83157716807198, 6.70948493720683, 8.15386338630991, 1.11185341104969,
3.38720490790306, 2.99130419153311, 1.05935876650386, 0,
0), tmp = c(18.893625, 18.893625, 18.893625, 19.7045148809524,
19.7045148809524, 19.7045148809524, 20.6929475806452, 20.6929475806452,
20.6929475806452, 23.8039069444444, 23.8039069444444, 23.8039069444444,
25.7307553763441, 25.7307553763441, 25.7307553763441, 28.3008958333333,
28.3008958333333, 28.3008958333333)), class = "data.frame", row.names = c(7L,
8L, 9L, 4L, 5L, 6L, 13L, 14L, 15L, 1L, 2L, 3L, 16L, 17L, 18L,
10L, 11L, 12L))
Plot:
library(ggplot2)
ggplot(df3, aes(x=month, y=mean)) +
geom_bar(aes(fill=site), stat="identity", color="black", position=position_dodge()) +
geom_line(aes(x=month, y=tmp*3, group=1), size=1) +
scale_y_continuous(sec.axis = sec_axis(~./3, name = "Temperature"))
But I would like to produce a plot with the complete temperature data set that is much longer than the length data set, hence to superimpose the two plots below:
Length vs Time
structure(list(month = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L), .Label = c("Jan",
"Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct",
"Nov", "Dec"), class = c("ordered", "factor")), site = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L), .Label = c("port", "bluff", "palme"), class = "factor"),
mean = c(15.4066666666667, 14.7866666666667, 0, 38.3066666666667,
40.31, 52.06, 93.6266666666667, 84.6233333333333, 91.1083333333333,
51.6, 78.2766666666667, 96.9466666666667, 15.9633333333333,
59.1333333333333, 42.3233333333333, 10.3033333333333, 0.1,
0.1), sd = c(6.10053323465469, 5.62921770355553, 0, 14.1369211722314,
18.7262925984499, 18.0693224497518, 34.0724297822208, 43.6832697662534,
45.0430332232857, 26.4636380327995, 36.7493624934932, 44.660549074974,
6.0898719387098, 18.5524853495072, 16.3840478206244, 5.80234692905013,
0, 0), n = c(30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L), se = c(1.11379988847678,
1.02774983911493, 0, 2.58103687323451, 3.41893762487101,
3.29899183485452, 6.22074612691106, 7.9754374121868, 8.22369511828274,
4.83157716807198, 6.70948493720683, 8.15386338630991, 1.11185341104969,
3.38720490790306, 2.99130419153311, 1.05935876650386, 0,
0)), row.names = c(7L, 8L, 9L, 4L, 5L, 6L, 13L, 14L, 15L,
1L, 2L, 3L, 16L, 17L, 18L, 10L, 11L, 12L), class = "data.frame")
Temperature vs Time
structure(list(month = structure(c(17927, 17928, 17929, 17930,
17931, 17932, 17933, 17934, 17935, 17936, 17937, 17938, 17939,
17940, 17941, 17942, 17943, 17944, 17945, 17946, 17947, 17948,
17949, 17950, 17951, 17952, 17953, 17954, 17955, 17956, 17957,
17958, 17959, 17960, 17961, 17962, 17963, 17964, 17965, 17966,
17967, 17968, 17969, 17970, 17971, 17972, 17973, 17974, 17975,
17976, 17977, 17978, 17979, 17980, 17981, 17982, 17983, 17984,
17985, 17986, 17987, 17988, 17989, 17990, 17991, 17992, 17993,
17994, 17995, 17996, 17997, 17998, 17999, 18000, 18001, 18002,
18003, 18004, 18005, 18006, 18007, 18008, 18009, 18010, 18011,
18012, 18013, 18014, 18015, 18016, 18017, 18018, 18019, 18020,
18021, 18022, 18023, 18024, 18025, 18026, 18027, 18028, 18029,
18030, 18031, 18032, 18033, 18034, 18035, 18036, 18037, 18038,
18039, 18040, 18041, 18042, 18043, 18044, 18045, 18046, 18047,
18048, 18049, 18050, 18051, 18052, 18053, 18054, 18055, 18056,
18057, 18058, 18059, 18060, 18061, 18062, 18063, 18064, 18065,
18066, 18067, 18068, 18069, 18070, 18071, 18072, 18073, 18074,
18075, 18076, 18077, 18078, 18079, 18080, 18081, 18082, 18083,
18084), class = "Date"), tmp = c(18.893625, 18.5962083333333,
18.6715416666667, 18.9369583333333, 19.3453333333333, 19.274,
19.5038333333333, 19.6745833333333, 19.6940833333333, 19.8091666666667,
19.706125, 19.40075, 19.46825, 19.6822083333333, 19.8845416666667,
19.8647916666667, 20.1585, 20.1541666666667, 19.6780416666667,
19.8689583333333, 20.44425, 20.551375, 20.29725, 19.876625, 19.6822083333333,
19.428375, 19.7496666666667, 19.9916666666667, 20.3329583333333,
20.6546666666667, 20.328875, 20.5115833333333, 20.2219583333333,
20.2930833333333, 20.4754583333333, 20.0629166666667, 19.8211666666667,
19.7100416666667, 19.4284583333333, 19.6745, 20.1306666666667,
20.301375, 20.01925, 19.7935416666667, 19.828875, 20.015375,
20.0948333333333, 20.7105416666667, 21.1350833333333, 21.613875,
22.1526666666667, 21.5527916666667, 20.9920833333333, 21.0596666666667,
21.3459166666667, 21.4136666666667, 21.7238333333333, 22.4411666666667,
22.0822916666667, 21.8911666666667, 21.5847916666667, 21.7676666666667,
22.0030833333333, 21.9430833333333, 22.4825416666667, 23.0897083333333,
23.5414583333333, 23.82, 24.0025833333333, 23.98625, 24.5325416666667,
23.6770416666667, 23.136, 23.3080416666667, 23.1600833333333,
22.7844583333333, 23.293375, 23.220125, 23.1320833333333, 23.0760833333333,
23.7297916666667, 24.1209166666667, 24.9144583333333, 25.6327083333333,
26.0205833333333, 26.126, 25.4773333333333, 25.1097916666667,
25.6192916666667, 25.8253333333333, 25.7808333333333, 25.137125,
25.0000416666667, 24.7380416666667, 24.4359166666667, 24.1663333333333,
24.110125, 23.913625, 24.0101666666667, 24.3034583333333, 24.6014583333333,
24.9077916666667, 25.0850833333333, 25.6234583333333, 25.9030416666667,
26.968125, 26.897875, 27.5782916666667, 27.9786666666667, 27.61375,
26.67575, 26.3882083333333, 26.0404166666667, 25.8984166666667,
26.2809583333333, 26.1988333333333, 26.3817916666667, 26.46325,
26.382, 26.0689166666667, 26.1216666666667, 26.6195, 26.859625,
27.0225833333333, 27.2107083333333, 27.810125, 28.0363333333333,
28.48275, 28.514375, 28.3447916666667, 28.0068333333333, 27.5096666666667,
27.5627083333333, 27.0342083333333, 27.6620416666667, 27.9710833333333,
27.953875, 27.936625, 28.21675, 28.439875, 29.0535833333333,
29.439625, 29.6357083333333, 29.4480833333333, 29.015625, 28.4232916666667,
28.386125, 28.8215416666667, 29.5276666666667, 29.602625, 30.4785416666667,
29.8260416666667, 29.632625, 29.2770416666667, 29.3150416666667,
29.6074583333333, 29.8493333333333, 29.778125), lgt = c(147.29012345679,
11.8685956790123, 13.272299382716, 39.9496913580247, 99.766049382716,
39.0691358024691, 25.5725308641975, 25.8137345679012, 8.34699074074074,
3.13125, 2.07638888888889, 2.64945987654321, 10.3154320987654,
15.0579475308642, 17.3253086419753, 5.01666666666667, 9.97484567901235,
4.50987654320988, 0.490046296296296, 10.1826388888889, 84.2596450617284,
24.7587191358025, 18.7372685185185, 0.431944444444444, 3.09814814814815,
2.27561728395062, 1.17939814814815, 2.3420524691358, 14.7756172839506,
5.03317901234568, 2.97345679012346, 10.9051697530864, 9.20262345679012,
5.7971450617284, 2.7158950617284, 1.27893518518519, 2.15941358024691,
1.35378086419753, 7.41674382716049, 15.3153549382716, 45.1071759259259,
8.91180555555556, 0.847145061728395, 2.97337962962963, 5.51496913580247,
6.80239197530864, 9.55956790123457, 19.1108796296296, 12.7988425925926,
100.853858024691, 103.503472222222, 1.48672839506173, 1.52824074074074,
7.30887345679012, 12.8653549382716, 8.03966049382716, 14.7921296296296,
9.89174382716049, 3.88703703703704, 3.20594135802469, 3.16442901234568,
6.14606481481481, 13.2888888888889, 10.797299382716, 93.5950617283951,
67.7730709876543, 140.919984567901, 58.2300925925926, 39.8996913580247,
34.9829475308642, 13.9283179012346, 5.21597222222222, 2.43371913580247,
4.75910493827161, 6.20424382716049, 5.37368827160494, 31.4861882716049,
8.70424382716049, 8.67098765432099, 5.43179012345679, 70.7297067901235,
58.6121141975309, 55.7964506172839, 86.9338734567901, 18.9948302469136,
19.418287037037, 9.50162037037037, 22.1922839506173, 25.4814043209877,
10.3818672839506, 54.5672067901235, 14.6177469135802, 38.5707561728395,
35.9960648148148, 6.78557098765432, 15.3651234567901, 11.5861111111111,
13.6625, 57.0010030864198, 73.3876543209877, 89.4920524691358,
124.690817901235, 29.2520833333333, 166.451080246914, 101.327237654321,
252.703935185185, 280.303240740741, 515.025694444444, 523.862731481481,
107.548225308642, 74.7662037037037, 264.074228395062, 16.6194444444444,
26.8019290123457, 116.119598765432, 38.2054012345679, 47.7068672839506,
53.346450617284, 17.9067901234568, 53.8114969135802, 60.1318672839506,
191.243055555556, 164.881172839506, 99.0101080246914, 78.977237654321,
163.701697530864, 139.366666666667, 107.556558641975, 4.65941358024691,
0, 0, 0, 0.00833333333333333, 0.00833333333333333, 0.0498456790123457,
0.0166666666666667, 0.00833333333333333, 0, 0, 0, 0, 0, 0.157793209876543,
62.9890432098765, 16.3951388888889, 33.5790895061728, 52.6984567901235,
50.3315586419753, 54.1604938271605, 66.2529320987654, 103.378703703704,
123.984799382716, 37.2088734567901, 6.13788580246914, 1.47013888888889,
28.2389660493827, 111.103240740741, 66.1949074074074)), row.names = c(NA,
-158L), class = c("tbl_df", "tbl", "data.frame"))
I wonder if there's any way I could do that?
Assuming your length versus time data.frame is named lvt and the temperature versus time is named tvt, we first convert every date to a date
# Assuming lvt$month is a factor beginning at 1 = january, 12 = december
# We'll pseudocenter the month around the 15th
lvt$month2 <- as.Date(ISOdate(2019, as.numeric(lvt$month), 15))
Then once we'll have both x-axes in date format, we can plot the data:
ggplot() +
geom_col(data = lvt, aes(month2, mean, fill = site),
position = "dodge",
colour = "black") +
geom_line(data = tvt, aes(month, tmp*3), size = 1) +
scale_y_continuous(sec.axis = sec_axis(~./3, name = "Temperature"))
Which looks like the following:
Note that it is a bit of a weird plot, because essentially you're mixing ordered categorical data with continuous data. For example tvt starts at January 31st, so it looks like it actually starts in Februari. Furthermore the distances between the groups of bars aren't constant because February for example is a shorter month.

Find records without a prerequisite record in the dataframe

I have a dataframe with 3 columns: Timestamp, MMR_NBR and Action
Action DFV must occur before SAP Load for all MMR_NBR instances. I want to extract the SAP Load instances that do NOT have a DFV action occurred before. I am using sqldf in R and I know that R uses SQLite database language so window functions are limited. I managed to get the records, but I am looking to see if there is a simpler and better way to write this with either a SQL query or any R package such as dplyr.
Sample Data:
df5 <- structure(list(Timestamp = structure(c(7L, 8L, 9L, 10L, 11L,
1L, 2L, 3L, 4L, 5L, 6L, 12L, 13L, 16L, 17L, 18L, 14L, 15L, 19L,
20L), .Label = c("8/14/2018 11:22:18 AM", "8/14/2018 11:30:03 AM",
"8/14/2018 11:32:26 AM", "8/14/2018 4:03:27 PM", "8/14/2018 4:04:05 PM",
"8/14/2018 4:04:11 PM", "8/20/2018 4:02:00 PM", "8/20/2018 6:12:50 PM",
"8/21/2018 9:56:51 AM", "8/21/2018 9:56:59 AM", "8/22/2018 10:43:45 AM",
"8/22/2018 10:43:57 AM", "8/22/2018 4:34:53 PM", "8/23/2018 1:53:25 PM",
"8/23/2018 1:53:36 PM", "8/23/2018 11:47:15 AM", "8/23/2018 12:23:44 PM",
"8/23/2018 12:26:20 PM", "8/23/2018 2:38:59 PM", "8/23/2018 2:39:19 PM"
), class = "factor"), MMR_NBR = structure(c(12L, 10L, 2L, 2L,
8L, 11L, 5L, 5L, 7L, 7L, 7L, 8L, 9L, 3L, 4L, 4L, 1L, 1L, 6L,
6L), .Label = c("B00215", "B00216", "B00218", "B00219", "K00364",
"K00625", "K00632", "K00642", "K00646", "W00362", "W00364", "W00365"
), class = "factor"), Action = structure(c(1L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("DFV",
"SAP Load"), class = "factor")), .Names = c("Timestamp", "MMR_NBR",
"Action"), row.names = c(NA, 20L), class = "data.frame")
in the above sample data 8/14/2018 11:22:18 AM W00364 SAP Load must be returned along with similar records as the result of the query.
R Script:
sql="SELECT DISTINCT Timestamp, MMR_NBR, Action FROM df5 WHERE (Action='DFV' OR Action='SAP Load') AND MMR_NBR<>''"
df5 <- sqldf::sqldf(sql)
sql="SELECT MMR_NBR,Action, COUNT(*) FROM df5 GROUP BY MMR_NBR HAVING COUNT(*)=1"
df6 <- sqldf::sqldf(sql)
Using dplyr:
Step 1: turn Timestamp into an actual timestamp:
df5$Timestamp<- as.POSIXct(as.character(df5$Timestamp), format="%m/%d/%Y %I:%M:%S %p")
Step 2:
require(dplyr)
df5 %>% group_by(MMR_NBR) %>%
arrange(Timestamp) %>% # Order by time
filter(Action=="SAP Load" & cumsum(Action=="DFV")==0) # Extract those cases where Action is "SAP Load" and the total of previous rows where Action was "DFV" is zero
Result:
# A tibble: 5 x 3
# Groups: MMR_NBR [4]
Timestamp MMR_NBR Action
<dttm> <fct> <fct>
1 2018-08-14 11:22:18 W00364 SAP Load
2 2018-08-14 11:30:03 K00364 SAP Load
3 2018-08-14 11:32:26 K00364 SAP Load
4 2018-08-22 16:34:53 K00646 SAP Load
5 2018-08-23 11:47:15 B00218 SAP Load

RODBC: Quotes around datetime-values for SQL Server APPENDS

Based on a previous question (here), I created a dataframe in R:
mdf <- structure(list(run = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("run_00",
"run_01", "run_02", "run_03", "run_04"), class = "factor"), slot = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("slot 3", "slot 4", "slot 5",
"slot 6"), class = "factor"), timestamp = structure(c(1320774563,
1320774624, 1320774686, 1320774747, 1320774809, 1320774871), class = c("POSIXct",
"POSIXt"), tzone = ""), channel = structure(c(1L, 1L, 1L, 1L,
1L, 1L), .Label = c("och01", "och02", "och09", "och10"), class = "factor"),
variable = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("num_blocks",
"num_collection", "num_corr_0", "num_corr_1", "num_uncorr_srow",
"post_fec_err_rate", "pre_fec_err_rate"), class = "factor"),
value = c(1, 62, 124, 185, 247, 309)), .Names = c("run",
"slot", "timestamp", "channel", "variable", "value"), row.names = c(NA,
6L), class = "data.frame")
> mdf
run slot timestamp channel variable value
1 run_00 slot 3 2011-11-08 12:49:23 och01 num_collection 1
2 run_00 slot 3 2011-11-08 12:50:24 och01 num_collection 62
3 run_00 slot 3 2011-11-08 12:51:26 och01 num_collection 124
4 run_00 slot 3 2011-11-08 12:52:27 och01 num_collection 185
5 run_00 slot 3 2011-11-08 12:53:29 och01 num_collection 247
6 run_00 slot 3 2011-11-08 12:54:31 och01 num_collection 309
Then I save these data to the database:
sqlSave(conout,mdf,tablename="mdf")
Now, I want to append the same data to the database:
sqlSave(conout,mdf,tablename="mdf",fast=FALSE,append=TRUE, verbose=TRUE)
This results in the following error:
[RODBC] ERROR: Could not SQLExecDirect 'INSERT INTO "mdf" ( "rownames", "run", "slot", "timestamp", "channel", "variable", "value" ) VALUES ( '1', 'run_00', 'slot 3', 2011-11-08 17:49:23, 'och01', 'num_collection', 1 )'
I understand the cause of the error, because R should make quotes around the datetime value "timestamp" (i.e. '2011-11-08 17:49:23'). However, I do not know how to work around this error. Some advice?
Package Version: 1.3.15
EDIT:
If referring to other questions, please be so kind to explain in how far they can help me.

ggmosaic error message: default method not implemented for type 'list'

Trying to create a heatmap using ggmosaic, I keep getting the error Error in is.finite(x) : default method not implemented for type 'list'
Searching for that error message, one answer was that "This error is because the is.infinite() and the is.finite() functions are not implemented with a method for data.frames." But not a useful solution in the question nor about ggmosaic
Even the example from the vignette fails for me.
ggplot(data = NHANES) +
geom_mosaic(aes(weight = Weight, x = product(SleepHrsNight), fill=factor(SleepHrsNight)), na.rm=TRUE) +
labs(x="Hours of sleep a night ", title='f(SleepHrsNight)') + guides(fill=guide_legend(title = "SleepHrsNight", reverse = TRUE))
My system is Windows, RStudio, R version one before the most current, and ggmosaic Ver 0.1.2
The made-up data frame I want to use is
structure(list(Diversity = structure(c(1L, 5L, 4L, 5L, 1L, 2L,
2L, 2L, 4L, 1L, 5L, 4L, 5L, 4L, 2L, 3L, 3L, 1L, 5L, 2L, 1L, 4L,
3L, 3L, 3L), .Label = c("AfricanAm", "Asian", "Cauc.", "Latino",
"Other"), class = "factor"), Office = structure(c(1L, 2L, 1L,
3L, 4L, 5L, 2L, 4L, 5L, 3L, 4L, 4L, 1L, 2L, 3L, 4L, 1L, 5L, 5L,
1L, 2L, 3L, 2L, 3L, 5L), .Label = c("Hamlet", "MainTown", "Metroprole",
"Smithville", "Urbanburg"), class = "factor"), JrAssoc = c(1,
1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 1, 1, 1, 2, 2, 3, 3, 3, 5, 5, 2,
5, 9, 10), SrAssPtr = c(2, 2, 1, 1, 3, 2, 1, 4, 4, 5, 1, 1, 3,
5, 7, 3, 2, 1, 1, 1, 1, 2, 3, 4, 4)), row.names = c(NA, -25L), .Names = c("Diversity",
"Office", "JrAssoc", "SrAssPtr"), class = c("tbl_df", "tbl",
"data.frame"))
This code has not succeeded:
ggplot(diverse) +
geom_mosaic(aes(weight = 1, x = product(JrAssoc, SrAssPtr), fill = Diversity))
Thank you for any guidance.
If you just update your ggplot2 library in this way
devtools::install_github('cran/ggplot2')
your problem should be solved.
As mentioned in my comments, author are trying to fix the issue.