Data Prep

Read in the data.

## Read in data and separate data #############
oilog <- read_excel(path="~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/577565_McGrattan_SMA/BABYVFSSIMP/data/DE_ID_LOG_20250529.xlsx", sheet="All Studies", col_types = "text")
relog <- read_excel(path="~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/577565_McGrattan_SMA/BABYVFSSIMP/data/DE_ID_LOG_20250529.xlsx", sheet="Raters")
dat <- read.csv(file="~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/577565_McGrattan_SMA/BABYVFSSIMP/data/SMABulbarPhysiologyF_DATA_2025-08-11_0850.csv")

#drop the extra columns in oiscores
oilog <- oilog %>% 
  select(`Subject ID`, starts_with("Exam")) %>% 
  filter(`Subject ID` != "View the pilot log for all of those studies") 

#remove the two that were rated twice (MAS_19   S R (330); MAS_20   S (326))
torm <- c(which(oilog$`Subject ID` == "MAS_19" & oilog$Exam1_Analysis_Code == "S R (330)"), 
          which(oilog$`Subject ID` == "MAS_20" & oilog$Exam1_Analysis_Code == "S (326)")
)

oilog <- oilog[-torm,]

#drop extra columns in relog
relog <- relog[,1:4]

#fix participant ID without caps
dat$studyid[which(dat$studyid == "lurie_25")] <- "Lurie_25"

#fix participant ID with dot in name
dat$studyid[which(dat$studyid == "Lurie_29.")] <- "Lurie_29"

Data includes information for both the analysis data as well as the patient data in the same sheet. Separate out into separate files.

chartreview <- dat %>%
  filter(redcap_event_name == "chart_review_arm_1") %>%
  select(studyid:vfss_analysis_complete) 


analysis <- dat %>% 
  filter(redcap_event_name == "vfss_analysis_arm_2") %>% 
  select(studyid, redcap_event_name, rater:vfss_analysis_complete)

Data Cleaning

Check overlap participant IDs in Log and Chart review arm

#which ids are in the chartreview data and not in the log?
summary(chartreview$studyid %in% oilog$`Subject ID`) #62 do not match

#chartreview$studyid[which(chartreview$studyid %in% oilog$`Subject ID` == FALSE)]

#which ids are in the log but not chart review?
summary(oilog$`Subject ID` %in% chartreview$studyid) #37 do not match

#oilog$`Subject ID`[which(oilog$`Subject ID` %in% chartreview$studyid==FALSE)]

#Appears to be just inconsistent typing - update log
oilog$`Subject ID`[grep("BRAZ_", oilog$`Subject ID`)] <- gsub("BRAZ_", "BRAZIL_", oilog$`Subject ID`[grep("BRAZ_", oilog$`Subject ID`)])

#add leading 0s to PRI and CHOP
oilog$`Subject ID`[grep("PRI_[[:digit:]]$", oilog$`Subject ID`)] <- gsub("PRI_", "PRI_0", oilog$`Subject ID`[grep("PRI_[[:digit:]]$", oilog$`Subject ID`)])

oilog$`Subject ID`[grep("CHOP_[[:digit:]]$", oilog$`Subject ID`)] <- gsub("CHOP_", "CHOP_0", oilog$`Subject ID`[grep("CHOP_[[:digit:]]$", oilog$`Subject ID`)])

#remove leading 0s from SHC
oilog$`Subject ID`[grep("SHC_0[[:digit:]]$", oilog$`Subject ID`)] <- gsub("SHC_0", "SHC_", oilog$`Subject ID`[grep("SHC_0[[:digit:]]$", oilog$`Subject ID`)])

#remove underscore from FL
oilog$`Subject ID`[grep("FL_", oilog$`Subject ID`)] <- gsub("_", "", oilog$`Subject ID`[grep("FL_", oilog$`Subject ID`)])

#check matches again
#which ids are in the chartreview data and not in the log?
summary(chartreview$studyid %in% oilog$`Subject ID`) #25 do not match

chartreview$studyid[which(chartreview$studyid %in% oilog$`Subject ID` == FALSE)]

#which ids are in the log but not chart review?
summary(oilog$`Subject ID` %in% chartreview$studyid) #all match

Add participant ID to data to know which studies were the same person

oilog1 <- oilog %>% 
  pivot_longer(cols = starts_with("Exam"), 
               names_to = "Exam", 
               values_to = "ID",
               values_drop_na = TRUE) %>% 
  mutate(studyid_clean = as.numeric(gsub("[[:punct:]]", "", gsub("[[:alpha:]]+", "", ID)))) %>% 
  #remove duplicate study ids (multiple raters) as these were the same person
  filter(!duplicated(studyid_clean)) 

#write out clean participant log
#write.csv(oilog1, file="../data/Clean_ID_log.csv", row.names = F)
  
#remove initials from id
analysis$studyid_clean <- as.numeric(gsub("[[:alpha:]]+", "", analysis$studyid))

#remove numbers from rater
analysis$rater_clean <- trimws(gsub("[[:digit:]]+", "",  analysis$rater))

#insert rater initials into blank one - use initials from studyid
analysis$rater_clean[which(analysis$rater_clean=="")] <- trimws(gsub("[[:digit:]]+", "",  analysis$studyid))[which(analysis$rater_clean=="")]

Make sure study ids in the data match the study ids in the log

#any log study ids not in the data?
summary(oilog1$studyid_clean %in% analysis$studyid_clean) #1
oilog1[which(oilog1$studyid_clean %in% analysis$studyid_clean==FALSE),] #Subject TX_08 - video not available


#data ids not in the log?
summary(analysis$studyid_clean %in% oilog1$studyid_clean) # all match now!

# #which study ids do not match a subject id?
# unique(analysis$studyid_clean[which(analysis$studyid_clean %in% oilog1$studyid_clean==FALSE)]) #11  16  16  16  23  29 279

Look at incomplete data - there is one case where vfss_analysis_complete is 0

analysis %>% 
  filter(vfss_analysis_complete == 0) %>% 
  select(studyid, rater, studyid_clean, rater_clean, vfss_analysis_complete) %>% 
  kable()

Merge log with analysis data

#for now, merge, keeping only those with a known subject ID
#add participant id to analysis data, remove NA ids (not complete) - one row less
analysis1 <- analysis %>% 
  filter(!is.na(studyid_clean)) %>% 
  inner_join(oilog1, by="studyid_clean")

Note 2025-07-29: BCH_13 is a duplicate with BCH_22. Need to remove BCH_13.

analysis1 <- analysis1 %>% 
  filter(`Subject ID` != "BCH_13")

chartreview <- chartreview %>% 
  filter(studyid != "BCH_13")

Note 2025-08-08: NZ_04 was removed from natural history paper and should be removed here

analysis1 <- analysis1 %>% 
  filter(`Subject ID` != "NZ_04")

chartreview <- chartreview %>% 
  filter(studyid != "NZ_04")

Gather relevant Ns

nparts <- length(unique(analysis1$`Subject ID`))

#ns for reliability vs analysis
relib_ids <- analysis1$studyid_clean[grep("con", analysis1$rater_clean, ignore.case=T)]

Participant Summary

There were a total of 177 participants. Look at participant by number of exams

analysis1 %>% 
  filter(grepl("con", rater_clean, ignore.case=T) | studyid %in% relib_ids == FALSE) %>% 
  group_by(`Subject ID`, studyid_clean) %>% 
  summarize(count=n()) %>% 
  group_by(`Subject ID`) %>% 
  summarize("N Studies" = n()) %>% 
  arrange(desc(`N Studies`)) %>% 
  kable()

Check accuracy

Use all data for this, including the reliability rated videos.

Bolus Location (_5)

If 5_bolus location at initiation of pharyngeal swallow =3 then

bolus3 <- analysis1 %>% 
  filter(t_5 == 3 | n_5 == 3 | h_5 == 3 | p_5 == 3)

6_ timing = 2

Flag any cases that don’t follow this pattern:

bolus3 %>% 
  mutate(flag = case_when(t_5 == 3 & t_6 != 2 ~ 1, 
                          n_5 == 3 & n_6 != 2 ~ 1, 
                          h_5 == 3 & h_6 != 2 ~ 1, 
                          p_5 == 3 & p_6 != 2 ~ 1, 
                          TRUE ~ 0)) %>% 
  select(studyid_clean, flag) %>% 
  arrange(studyid_clean) %>% 
  filter(flag == 1)

Examine all data where _5 == 3

bolus3 %>% 
  select(studyid_clean,ends_with("_5"), ends_with("_6")) %>% 
  select(studyid_clean, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>% 
  arrange(studyid_clean) %>% 
  kable()

10_late laryngeal closure =3

Flag cases that don’t follow this pattern:

bolus3 %>% 
  mutate(flag = case_when(t_5 == 3 & t_10 != 3 ~ 1, 
                          n_5 == 3 & n_10 != 3 ~ 1, 
                          h_5 == 3 & h_10 != 3 ~ 1, 
                          p_5 == 3 & p_10 != 3 ~ 1, 
                          TRUE ~ 0)) %>% 
  select(studyid_clean, rater, flag) %>% 
  arrange(studyid_clean) %>% 
  filter(flag == 1)

#code for checking individual ratings: 
# filter(analysis1, studyid_clean == 24) %>% select(studyid_clean, rater, ends_with("_5"), ends_with("_10"))

Examine all data where _5 == 3

bolus3 %>% 
  select(studyid_clean, rater, ends_with("_5"), ends_with("_10")) %>% 
   select(studyid_clean,rater, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>%
  arrange(studyid_clean) %>% 
  kable()

16_epiglottic =1 or 2

Flag cases that don’t follow this pattern:

bolus3 %>% 
  mutate(flag = case_when(t_5 == 3 & t_16 == 0 ~ 1, 
                          n_5 == 3 & n_16 == 0 ~ 1, 
                          h_5 == 3 & h_16 == 0 ~ 1, 
                          p_5 == 3 & p_16 == 0 ~ 1, 
                          TRUE ~ 0)) %>% 
  select(studyid_clean, rater, flag) %>% 
  arrange(studyid_clean) %>% 
  filter(flag == 1)

Examine all data where _5 == 3

bolus3 %>% 
  select(studyid_clean,rater, ends_with("_5"), ends_with("_16")) %>% 
   select(studyid_clean, rater, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>%
  arrange(studyid_clean) %>% 
  kable()

17_tongue base 2, 3 or 4 Flag cases that don’t follow this pattern:

bolus3 %>% 
  mutate(flag = case_when(t_5 == 3 & t_17 < 2 ~ 1, 
                          n_5 == 3 & n_17 < 2 ~ 1, 
                          h_5 == 3 & h_17 < 2 ~ 1, 
                          p_5 == 3 & p_17 < 2 ~ 1, 
                          TRUE ~ 0)) %>% 
  select(studyid_clean,rater, flag) %>% 
  arrange(studyid_clean) %>% 
  filter(flag == 1)

Examine all data where _5 == 3

bolus3 %>% 
  select(studyid_clean, rater, ends_with("_5"), ends_with("_17")) %>% 
   select(studyid_clean, rater, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>%
  arrange(studyid_clean) %>% 
  kable()

18_pharyngeal stripping wave= 1, or 2 Flag cases that don’t follow this pattern:

bolus3 %>% 
  mutate(flag = case_when(t_5 == 3 & t_18 == 0 ~ 1, 
                          n_5 == 3 & n_18 == 0 ~ 1, 
                          h_5 == 3 & h_18 == 0 ~ 1, 
                          p_5 == 3 & p_18 == 0 ~ 1, 
                          TRUE ~ 0)) %>% 
  select(studyid_clean, rater, flag) %>% 
  arrange(studyid_clean) %>% 
  filter(flag == 1)

Examine all data where _5 == 3

bolus3 %>% 
  select(studyid_clean, rater, ends_with("_5"), ends_with("_18")) %>%
   select(studyid_clean, rater, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>%
  arrange(studyid_clean) %>% 
  kable()

20_residue = 3 or 4 Flag cases that don’t follow this pattern:

bolus3 %>% 
  mutate(flag = case_when(t_5 == 3 & t_20 < 3 ~ 1, 
                          n_5 == 3 & n_20 < 3 ~ 1, 
                          h_5 == 3 & h_20 < 3 ~ 1, 
                          p_5 == 3 & p_20 < 3 ~ 1, 
                          TRUE ~ 0)) %>% 
  select(studyid_clean,rater, flag) %>% 
  arrange(studyid_clean) %>% 
  filter(flag == 1)

Examine all data where _5 == 3

bolus3 %>% 
  select(studyid_clean,rater,ends_with("_5"), ends_with("_20")) %>% 
   select(studyid_clean,rater, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>%
  arrange(studyid_clean) %>% 
  kable()

21_PES 2 or 3 Flag cases that don’t follow this pattern:

bolus3 %>% 
  mutate(flag = case_when(t_5 == 3 & t_21 < 2 ~ 1, 
                          n_5 == 3 & n_21 < 2 ~ 1, 
                          h_5 == 3 & h_21 < 2 ~ 1, 
                          p_5 == 3 & p_21 < 2 ~ 1, 
                          TRUE ~ 0)) %>% 
  select(studyid_clean,rater, flag) %>% 
  arrange(studyid_clean) %>% 
  filter(flag == 1)

Examine all data where _5 == 3

bolus3 %>% 
  select(studyid_clean,rater,ends_with("_5"), ends_with("_21")) %>% 
   select(studyid_clean,rater, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>%
  arrange(studyid_clean) %>% 
  kable()

PES (_21)

If 21_PES = 2 or 3, then 20_residue = 3 or 4

pes21 <- analysis1 %>% 
  filter(t_21 >=2 | n_21 >= 2 | h_21 >= 2 | p_21 >= 2)

Flag cases that don’t follow this pattern:

pes21 %>% 
  mutate(flag = case_when(t_21 >=2 & t_20 < 3 ~ 1, 
                          n_21 >=2 & n_20 < 3 ~ 1, 
                          h_21 >=2 & h_20 < 3 ~ 1, 
                          p_21 >=2 & p_20 < 3 ~ 1, 
                          TRUE ~ 0)) %>% 
  select(studyid_clean,rater, flag) %>% 
  arrange(studyid_clean) %>% 
  filter(flag == 1)

Examine all data where _21 >=2

pes21 %>% 
  select(studyid_clean,rater, ends_with("21"), ends_with("_20")) %>% 
   select(studyid_clean,rater, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>%
  arrange(studyid_clean) %>% 
  kable

Tongue Base (_17)

If 17_tongue base = 3 or 4

tonguebase34 <- analysis1 %>% 
  filter(t_17 >=3 | n_17 >= 3 | h_17 >= 3 | p_17 >= 3)

16_epiglottic = 1 or 2 Flag cases that don’t follow this pattern:

tonguebase34 %>% 
  mutate(flag = case_when(t_17 >=3 & t_16 == 0 ~ 1, 
                          n_17 >=3 & n_16 == 0 ~ 1, 
                          h_17 >=3 & h_16 == 0 ~ 1, 
                          p_17 >=3 & p_16 == 0 ~ 1, 
                          TRUE ~ 0)) %>% 
  select(studyid_clean,rater, flag) %>% 
  arrange(studyid_clean) %>% 
  filter(flag == 1)

Examine all data where _17 = 3 or 4

tonguebase34 %>% 
  select(studyid_clean, rater,ends_with("_17"), ends_with("_16")) %>% 
  select(studyid_clean,rater, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>%
  arrange(studyid_clean) %>% 
  kable()

19_valleculae =2, 3, 4 Flag cases that don’t follow this pattern:

tonguebase34 %>% 
  mutate(flag = case_when(t_17 >=3 & t_19 < 2 ~ 1, 
                          n_17 >=3 & n_19 < 2 ~ 1, 
                          h_17 >=3 & h_19 < 2 ~ 1, 
                          p_17 >=3 & p_19 < 2 ~ 1, 
                          TRUE ~ 0)) %>% 
  select(studyid_clean,rater, flag) %>% 
  arrange(studyid_clean) %>% 
  filter(flag == 1)

Examine all data where _17 = 3 or 4

tonguebase34 %>% 
  select(studyid_clean,rater, ends_with("_17"), ends_with("_19")) %>% 
   select(studyid_clean,rater, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>%
  arrange(studyid_clean) %>% 
  kable()

Check Age of Symptom onset

Look at Age of symptom onset that are non-numeric

chartreview %>% 
  group_by(sympoms) %>% 
  summarize(N=n()) %>% 
  kable(digits = 0)

#check case
chartreview %>% 
  filter(sympoms == "Parents reported patient was strong at initial neuro visit (comparing to a sibling with SMA)") %>% 
  select(contains("ID"))

chartreview %>% 
  select(studyid, sympoms) %>% 
  filter(grepl("7|8|9|Information not found", sympoms)) %>% 
  kable()

Clean the Age of symptom onset question

#save original as new variable
chartreview$sympoms_original <- chartreview$sympoms

#edit ages entered as text
textages <- which(is.na(as.numeric(chartreview$sympoms)))
agestofix <- chartreview$sympoms[textages]

#if there are "mos" or "months" text, remove
agestofix <- gsub("mos|months", "", agestofix)

#birth 2/2 familial mutation --> asymptomatic
agestofix[which(agestofix == "birth 2/2 familial mutation")] <- "asymptomatic"

#asymptomatic --> NA
agestofix[grepl("asymp", agestofix, ignore.case=T)] <- NA

#convert weeks to months - multiply weeks by 0.23 
agestofix[grepl("weeks", agestofix)] <- ifelse(!is.na(as.numeric(gsub(" weeks", "", agestofix[grepl("weeks", agestofix)]))), 
                                        as.numeric(gsub(" weeks", "", agestofix[grepl("weeks", agestofix)]))*.23, agestofix[grepl("weeks", agestofix)])

#multiply days by 0.032855
agestofix[which(agestofix=="37 month, 11 days")] <- 37 + 11*0.032855


#specific changes
agestofix[which(agestofix==".01  (placed as .1 but actually corrected for PMA is 0)")] <- .01
agestofix[which(agestofix==".01 (actually 0 corrected for pma but rob said put as included)")] <- .01
agestofix[which(agestofix=="Significant global hypotonia at birth. Screen performed at DOL 5.")] <- 0
agestofix[which(agestofix == "0.01 (said since birth but not requiring invasive things)")] <- 0.01
agestofix[which(agestofix == "12 *Include didn't achieve sitting by then")] <- 12

agestofix[which(agestofix=="Unknown--earliest record shows 6 ")] <- 6
agestofix[which(agestofix=="between 3-12  -- adopted and reportedly 'fine' at 3  and when moved to foster care was severely weak at 12 ; details between 3-12  are unclear")] <- 12
agestofix[which(agestofix=="1 month")] <- 1
agestofix[which(agestofix == "8  referred to PT for hypotonia the timing of referral to PT was related to her being shifted into foster care. At that time she hadn't achieved any motor milestones, her Bayley-3 scaled score was a 1 for gross motor skills. We expect she likely had much earlier weakness but unfortunately nothing was documented due to lack of routine follow up by bio mom.   She achieved independent sitting (could prop sit about 1 minute) around 2 years of age in 2018.")] <- 8
agestofix[which(agestofix=="So at the time of initial referral to PT (8 mths) patient was being shifted into foster care. At that time she hadn't achieved any motor milestones, her Bayley-3 scaled score was a 1 for gross motor skills. We expect she likely had much earlier weakness but unfortunately nothing was documented due to lack of routine follow up by bio mom.   She received nusinersen at 1 year and achieved independent sitting (could prop sit about 1 minute) around 2 years of age in 2018. Basil and Graham said type 1 to include")] <- 8

#agestofix[which(agestofix=="Parents reported patient was strong at initial neuro visit (comparing to a sibling with SMA)Per neuro parents were adamant she was stronger than sister at 6 . So they recognized she was maybe weaker than typical but didn't feel there was a problem  Agreed to testing. And that's our first encounter with her. They said she was eating well but also reported bottle propping. Feeding issues onset was reported at 7  that they report being due to a sore throat. Never achieved sitting. Include. ")] <- 7
agestofix[which(agestofix == "14  admitted with severe hypotonia. Never sat. SMA 1")] <- 14

agestofix[which(agestofix == "Parents reported patient was strong at initial neuro visit (comparing to a sibling with SMA)Per neuro parents were adamant she was stronger than sister at 6 . So they recognized she was maybe weaker than typical but didn't feel there was a problem  Agreed to testing. And that's our first encounter with her. They said she was eating well but also reported bottle propping. Feeding issues onset was reported at 7  that they report being due to a sore throat. Never achieved sitting. Include. ")] <- 7
agestofix[which(agestofix == "0.01 (actually 0 but include per graham)")] <- 0.01
agestofix[which(agestofix == "0 include per MD based on not sitting")] <- 0
agestofix[which(agestofix == "0 over ride and include bc never achieved sitting")] <- 0
agestofix[which(agestofix==".43   From our conversations I gather that on his initial assessment neuro exam was WNL, but on the day of treatment he had lost all(?) reflexes? Correct.  11/15/19 was initial consult date and normal.  By first LP day (11/22/19), here is what I found: \"Trace reflexes are appreciated in the upper extremities and at the ankles, but unfortunately I cannot obtain them in the knees despite multiple attempts.  Otto was somewhat tired when I examined him, as this was after his lumbar puncture, and I did appreciate clear antigravity movements of the foot and at the knee.  I did not appreciate full hip flexion today.  Tone is normal.  Examination of his tongue reveals no fasciculations, and thrush.  Please note that I looked at his tongue multiple times to be sure of the fasciculation question.\"  Basically, mild hip flexion weakness and areflexia.     FYI, he progressed after that.  By LP day #2 (12/06/2019), he had these findings: \"Limited examination today shows him to be awake with no facial concerns.  His biceps reflexes are elicited, but otherwise they are absent.  He has a wrist drop on the left, but can maintain it against gravity to the level of the horizon, whereas on the right he actually can still bring it up with antigravity, although is quite limited.  Antigravity strength at the biceps still appreciated, and less than 50% antigravity movement is appreciated at the knee.\"")] <- 0.43

#make NA for now
# agestofix[which(agestofix=="Information not found")] <- NA
# agestofix[which(agestofix=="Parents reported patient was strong at initial neuro visit (comparing to a sibling with SMA)")] <- NA #but should include as Type 1
agestofix[which(agestofix=="5 do")] <- 5*0.032855

#ranges as mid-point for now
agestofix[which(agestofix=="4-5 ")] <- 4.5
agestofix[which(agestofix=="2-6 weeks")] <- 4*0.23 
agestofix[which(agestofix=="4-8 weeks")] <- 6*0.23


#add back into data
chartreview$sympoms[textages] <- agestofix

#remove the ages for the two cases (Lurie_3 and Lurie_7) who were included as overrides as the age calculations/ranges given those aren't accurate based on the circumstances the clinicians indicated
#2024-10-10 commenting this out for the treated before/after symptoms as ages were corrected
#chartreview$sympoms[which(chartreview$studyid == "Lurie_3")] <- NA
#chartreview$sympoms[which(chartreview$studyid == "Lurie_7")] <- NA

Check and clean age variables for each study

#edit ages entered as text
textages <- which(is.na(as.numeric(chartreview$age_mbs_1)))
agestofix <- chartreview$age_mbs_1[textages]
agestofix[which(agestofix == "37w5d")] <- 37*0.23 + 5*0.0328767
agestofix[which(agestofix == "6 mos 26 days")] <- 6 + 26*0.0328767
agestofix[which(agestofix == "5 months 26 days")] <- 5 + 26*0.0328767
agestofix[which(agestofix == "22m ")] <- 22
agestofix[which(agestofix == "21 mos")] <- 21
agestofix[which(agestofix == "38 mo")] <- 38

# #manually convert, days * 0.0328767
# agestofix1 <- strsplit(agestofix, "months|mos")
# fixedages <- unlist(lapply(agestofix1, function(x) as.numeric(x[1]) + as.numeric(gsub("day|days", "", x[2]))*0.0328767))

#add back in
chartreview$age_mbs_1[textages] <- agestofix

chartreview %>% 
  mutate(age_mbs_1 = as.numeric(age_mbs_1)) %>% 
  filter(age_mbs_1 > 20) %>% 
  select(studyid, starts_with("age_mbs")) %>% 
  arrange(age_mbs_1) %>% 
  kable()

Check OI_9 scores

Look across all consistencies for now to see if anyone as a 0 for _9

checkoi9 <- analysis1 %>% 
  filter(t_9 == 0 | n_9 == 0 | h_9 == 0 | p_9 == 0) %>% 
  rowwise() %>% 
  mutate(oi_9 = max(c(t_9, n_9, h_9, p_9), na.rm=T))

checkoi9 %>% 
  filter(oi_9 == 0) %>% 
  select(studyid_clean, oi_9, ends_with("_9"), ends_with("_10")) %>% 
   select(studyid_clean, oi_9, starts_with("t"), starts_with("n"), starts_with("h"), starts_with("p")) %>%
  arrange(studyid_clean) %>% 
  kable()

# filter(analysis1, studyid_clean == 259) %>% select(studyid_clean, rater, ends_with("_9"), ends_with("_10"))

Demographic Metadata

Add metadata labels to the data

chartreview <- chartreview %>% 
  mutate(sexc = case_when(sex == 1 ~ "Male", 
                          sex == 2 ~ "Female"), 
         racec = case_when(race == 1 ~ "White",
                           race == 2 ~ "Black / African-American", 
                           race == 3 ~  "Asian", 
                           race == 4 ~ "Native American / Alaskan Native", 
                           race == 5 ~  "Native Hawaiian / Other Pacific Islander", 
                           race == 6 ~  "Other",
                           race == 7 ~ "Unknown/Not Available"), 
         ethnicityc = case_when(ethnicity == 1 ~ "Hispanic or Latino", 
                                ethnicity == 2 ~ "Not Hispanic or Latino", 
                                ethnicity == 3 ~ "Unknown/Not Available"), 
         genetic_testingc = case_when(genetic_testing == 1 ~ "Yes", 
                                      genetic_testing == 0 ~ "No"), 
         smn2c = case_when(smn2 == 1 ~ "1", 
                           smn2 == 2 ~ "2", 
                           smn2 == 3 ~ "3", 
                           smn2 == 4 ~ "Unknown"), 
         screenc = case_when(screen == 1 ~ "Yes", 
                             screen == 0 ~ "No"), 
         referral_mbs1c = case_when(referral_mbs1 == 0 ~ "Asymptomatic, Standard High Risk Referral at Diagnosis", 
                                    referral_mbs1 == 1 ~ "Concerns that swallowing may be contributing to nutritional or respiratory impairments given high risk", 
                                    referral_mbs1 == 2 ~ "Caregiver reported symptoms", 
                                    referral_mbs1 == 3 ~ "Follow-Up from previous impairment"), 
         referral_mbs2c = case_when(referral_mbs2 == 0 ~ "Asymptomatic, Standard High Risk Referral at Diagnosis", 
                                    referral_mbs2 == 1 ~ "Concerns that swallowing may be contributing to nutritional or respiratory impairments given high risk", 
                                    referral_mbs2 == 2 ~ "Caregiver reported symptoms", 
                                    referral_mbs2 == 3 ~ "Follow-Up from previous impairment"), 
         referral_mbs3c = case_when(referral_mbs3 == 0 ~ "Asymptomatic, Standard High Risk Referral at Diagnosis", 
                                    referral_mbs3 == 1 ~ "Concerns that swallowing may be contributing to nutritional or respiratory impairments given high risk", 
                                    referral_mbs3 == 2 ~ "Caregiver reported symptoms", 
                                    referral_mbs3 == 3 ~ "Follow-Up from previous impairment"), 
         referral_mbs4c = case_when(referral_mbs4 == 0 ~ "Asymptomatic, Standard High Risk Referral at Diagnosis", 
                                    referral_mbs4 == 1 ~ "Concerns that swallowing may be contributing to nutritional or respiratory impairments given high risk", 
                                    referral_mbs4 == 2 ~ "Caregiver reported symptoms", 
                                    referral_mbs4 == 3 ~ "Follow-Up from previous impairment"), 
         referral_mbs5c = case_when(referral_mbs5 == 0 ~ "Asymptomatic, Standard High Risk Referral at Diagnosis", 
                                    referral_mbs5 == 1 ~ "Concerns that swallowing may be contributing to nutritional or respiratory impairments given high risk", 
                                    referral_mbs5 == 2 ~ "Caregiver reported symptoms", 
                                    referral_mbs5 == 3 ~ "Follow-Up from previous impairment"))

Calculate Overall Impression (OI) Scores

OI scores are calculated within each component as the worst score across consistencies for each case.

First step is to replace the scores corresponding to “Could not visualize” with NA so they are treated as missing.

#t_1, n_1, h_1 = 3
analysis1[,c("t_1", "n_1", "h_1")][analysis1[,c("t_1", "n_1", "h_1")]==3] <- NA

#t_2, n_2, h_2 = 8
analysis1[,c("t_2", "n_2", "h_2")][analysis1[,c("t_2", "n_2", "h_2")]==8] <- NA

#t_3, h_3 = 3 *NOTE: n_3 did not have this option in the codebook, guessing these are already NA
analysis1[,c("t_3", "n_3", "h_3")][analysis1[,c("t_3", "n_3", "h_3")]==3] <- NA

#t_4, n_4, h_4 = 3
analysis1[,c("t_4", "n_4", "h_4")][analysis1[,c("t_4", "n_4", "h_4")]==3] <- NA

Update the scores to reflect kids >12 months should not have a score for sucking components (OI_1-3) because they developmentally shouldn’t be sucking. I think what happened is the raters selected the worst score (2) for that if they didn’t give them a bottle and instead syringed it. So, for any kid >12 mths at the time of their swallow study, mark their score as NA rather than the worst score. We will use 13 as the cut off to include those with exams at 12 and change

#find study ids where ages were greater than 12 months
exam1over12m <- chartreview$studyid[which(as.numeric(chartreview$age_mbs_1) > 13)]
exam2over12m <- chartreview$studyid[which(as.numeric(chartreview$age_mbs_2) > 13)]
exam3over12m <- chartreview$studyid[which(as.numeric(chartreview$age_mbs_3) > 13)]
exam4over12m <- chartreview$studyid[which(as.numeric(chartreview$age_mbs_4) > 13)]
exam5over12m <- chartreview$studyid[which(as.numeric(chartreview$age_mbs_5) > 13)]

#t_1, n_1, h_1 for each exam
analysis1[which(analysis1$Exam == "Exam1_Analysis_Code" & analysis1$`Subject ID` %in% exam1over12m),c("t_1", "n_1", "h_1")] <- NA
analysis1[which(analysis1$Exam == "Exam2_Analysis_Code" & analysis1$`Subject ID` %in% exam2over12m),c("t_1", "n_1", "h_1")] <- NA
analysis1[which(analysis1$Exam == "Exam3_Analysis_Code" & analysis1$`Subject ID` %in% exam3over12m),c("t_1", "n_1", "h_1")] <- NA
analysis1[which(analysis1$Exam == "Exam4_Analysis_Code" & analysis1$`Subject ID` %in% exam4over12m),c("t_1", "n_1", "h_1")] <- NA
analysis1[which(analysis1$Exam == "Exam5_Analysis_Code" & analysis1$`Subject ID` %in% exam5over12m),c("t_1", "n_1", "h_1")] <- NA

#t_2, n_2, h_2 for each exam
analysis1[which(analysis1$Exam == "Exam1_Analysis_Code" & analysis1$`Subject ID` %in% exam1over12m),c("t_2", "n_2", "h_2")] <- NA
analysis1[which(analysis1$Exam == "Exam2_Analysis_Code" & analysis1$`Subject ID` %in% exam2over12m),c("t_2", "n_2", "h_2")] <- NA
analysis1[which(analysis1$Exam == "Exam3_Analysis_Code" & analysis1$`Subject ID` %in% exam3over12m),c("t_2", "n_2", "h_2")] <- NA
analysis1[which(analysis1$Exam == "Exam4_Analysis_Code" & analysis1$`Subject ID` %in% exam4over12m),c("t_2", "n_2", "h_2")] <- NA
analysis1[which(analysis1$Exam == "Exam5_Analysis_Code" & analysis1$`Subject ID` %in% exam5over12m),c("t_2", "n_2", "h_2")] <- NA

#t_3, h_3 for each exam
analysis1[which(analysis1$Exam == "Exam1_Analysis_Code" & analysis1$`Subject ID` %in% exam1over12m),c("t_3", "n_3", "h_3")] <- NA
analysis1[which(analysis1$Exam == "Exam2_Analysis_Code" & analysis1$`Subject ID` %in% exam2over12m),c("t_3", "n_3", "h_3")] <- NA
analysis1[which(analysis1$Exam == "Exam3_Analysis_Code" & analysis1$`Subject ID` %in% exam3over12m),c("t_3", "n_3", "h_3")] <- NA
analysis1[which(analysis1$Exam == "Exam4_Analysis_Code" & analysis1$`Subject ID` %in% exam4over12m),c("t_3", "n_3", "h_3")] <- NA
analysis1[which(analysis1$Exam == "Exam5_Analysis_Code" & analysis1$`Subject ID` %in% exam5over12m),c("t_3", "n_3", "h_3")] <- NA

Then create the OI scores.

#add columns to hold these scores
analysis1[,paste("oi", 1:21, sep="_")] <- NA

#take maximum across the relevant consistencies, place it into associated oi_x column.  
#Note: the ones that are all NA will return "inf" with a warning
for (i in 1:21){
  analysis1[,paste("oi", i, sep="_")] <- 
    apply(analysis1[,grep(paste0("t_", i, "$|",
                                       "n_", i, "$|", 
                                       "h_", i, "$|",
                                       "p_", i, "$"), 
                                names(analysis1))], #selects consistency columns for current component
          1, #looks across rows
          max, na.rm=T) #takes MAXIMUM
}


#replace any INF or -INF with NA
analysis1[analysis1=="Inf"] <- NA
analysis1[analysis1=="-Inf"] <- NA

Reshape exam-specific data to long format

#add dummy variable to make 5 options for another_mbs
chartreview$another_mbs5 <- NA

varlist <- list(grep("pharma_treatment", names(chartreview), value=T),
                grep("age_mbs", names(chartreview), value=T), 
                grep("treat_.*_1", names(chartreview), value=T), 
                 grep("treat_.*_2", names(chartreview), value=T), 
                 grep("treat_.*_3", names(chartreview), value=T), 
                 grep("treat_.*_4", names(chartreview), value=T), 
                 grep("treat_.*_5", names(chartreview), value=T),
                 grep("resp_cur_.*_1", names(chartreview), value=T),
                grep("resp_cur_.*_2", names(chartreview), value=T),
                grep("resp_cur_.*_3", names(chartreview), value=T),
                grep("resp_cur_.*_4", names(chartreview), value=T),
                grep("resp_cur_.*_5", names(chartreview), value=T),
                grep("resp_cur_.*_6", names(chartreview), value=T),
                grep("resp_cur_.*_7", names(chartreview), value=T),
                grep("secretions_mbs.*_1", names(chartreview), value=T),
                 grep("secretions_mbs.*_2", names(chartreview), value=T),
                grep("caregiver_symptoms.*_0", names(chartreview), value=T),
                grep("caregiver_symptoms.*_1", names(chartreview), value=T),
                grep("caregiver_symptoms.*_2", names(chartreview), value=T),
                grep("caregiver_symptoms.*_3", names(chartreview), value=T),
                grep("caregiver_symptoms.*_4", names(chartreview), value=T),
                grep("caregiver_symptoms.*_5", names(chartreview), value=T),
                grep("caregiver_symptoms.*_6", names(chartreview), value=T),
                grep("caregiver_symptoms.*_7", names(chartreview), value=T),
                grep("caregiver_symptoms.*_8", names(chartreview), value=T),
                grep("comments_[[:digit:]]", names(chartreview), value=T),
                 grep("fois_cur.*", names(chartreview), value=T),
                 grep("^hospitalized_mbs.*", names(chartreview), value=T), 
                   grep("weeks_hospitalized", names(chartreview), value=T),
                grep("resp_past_.*_1", names(chartreview), value=T),
                 grep("resp_past_.*_2", names(chartreview), value=T),
                 grep("resp_past_.*_3", names(chartreview), value=T),
                 grep("resp_past_.*_4", names(chartreview), value=T),
                 grep("resp_past_.*_5", names(chartreview), value=T),
                 grep("resp_past_.*_6", names(chartreview), value=T),
                 grep("resp_past_.*_7", names(chartreview), value=T),
                 grep("secretions_past_.*_1", names(chartreview), value=T),
                 grep("secretions_past_.*_2", names(chartreview), value=T),
                 grep("fois_past_.*", names(chartreview), value=T),
                 grep("referral_mbs[[:digit:]]$", names(chartreview), value=T),
                grep("referral_mbs[[:digit:]]c", names(chartreview), value=T),
                grep("presentations_mbs.*_1", names(chartreview), value=T),
                grep("presentations_mbs.*_2", names(chartreview), value=T),
                grep("presentations_mbs.*_3", names(chartreview), value=T),
                grep("presentations_mbs.*_4", names(chartreview), value=T),
                grep("presentations_mbs.*_5", names(chartreview), value=T),
                grep("thin_utensil_mbs*", names(chartreview), value=T),
                grep("nectar_utensil_mbs*", names(chartreview), value=T),
                grep("honey_utensil_mbs*", names(chartreview), value=T), 
                grep("another_mbs", names(chartreview), value=T))

chartreview_forlong <- chartreview %>% 
  filter(is.na(redcap_repeat_instance)) %>% 
  select(studyid, all_of(unlist(varlist))) 
              
chartreview_long <- reshape(chartreview_forlong, 
                                        direction = "long", 
                                        varying = varlist, 
                                        timevar = "visit")

names(chartreview_long) <- gsub("_mbs1|mbs_1", "", names(chartreview_long))

Check ages again in long format

textages <- which(is.na(as.numeric(chartreview_long$age_)))
agestofix <- chartreview_long$age_[textages]
agestofix[which(agestofix == "12 mos 1 day")] <- 12 + 1*0.0328767
agestofix[which(agestofix == "8 months 18 days")] <- 8 + 18*0.0328767
agestofix[which(agestofix == "41w3d")] <- 41*0.23 + 3*0.0328767
agestofix[which(agestofix == "27 months 5 days")] <- 17 + 5*0.0328767
agestofix[which(agestofix == "38 mo")] <- 38


chartreview_long$age_[textages] <- agestofix

Check and fix treatment age in chartreview

chartreview$treatment_age_c <- chartreview$treatment_age
toclean <- which(is.na(as.numeric(chartreview$treatment_age_c)))
agestofix <- chartreview$treatment_age_c[toclean]

agestofix[which(agestofix == "4 (spinraza) 23 (zolgensma)")] <- 4
agestofix[which(agestofix == "Zolgensma 0.76, Spinraza 1.71")] <- 0.76
agestofix[which(agestofix == "6.4 (Z) 26.7 (R)")] <- 6.4
agestofix[which(agestofix == "14 yrs")] <- 12*14
agestofix[which(agestofix == "100 mos")] <- 100
agestofix[which(agestofix == "8.11- zolgemsma; 22.97- risdipam")] <- 8.11
agestofix[which(agestofix == "7.5 mths")] <- 7.5
agestofix[which(agestofix == "5- Zolgensma")] <- 5
agestofix[which(agestofix == "2 months- spinraza, zolgensma 9.23")] <- 2
agestofix[which(agestofix == "9.03- zolgensma, ~ 2.5 months- spinraza- unknown exact date")] <- 9.03
agestofix[which(agestofix == "spinraza ~ approx 6 months old. zolgensma-14.88")] <- 6
agestofix[which(agestofix == "3- risdipam, 7 months- zolgensma, spinraza-15 months")] <- 3
agestofix[which(agestofix == "6 wo")] <- 6
agestofix[which(agestofix == "9 months")] <- 9
agestofix[which(agestofix == "7.85 spinraza")] <- 7.85
agestofix[which(agestofix == "3.98 / 12.39")] <- 3.98
agestofix[which(agestofix == "Between 7-12 months")] <- 9.5 #took average

chartreview$treatment_age_c[toclean] <- agestofix

#look at treatment Weeks (instructions: If treatment was provided before 1 mth, indicate age in days that treatment was first provided (adjusted if premature))
chartreview[chartreview$treatment_wks != "",c("treatment_age","treatment_wks")] 

#for those with 0 or blank for treatment age, replace with days converted to months
chartreview$treatment_age_c[which(chartreview$treatment_age == "0" & chartreview$treatment_wks == "21")] <- 21*0.0328767
chartreview$treatment_age_c[which(chartreview$treatment_age == "" & chartreview$treatment_wks == "12")] <- 12*0.0328767
chartreview$treatment_age_c[which(chartreview$treatment_age == "0" & chartreview$treatment_wks == "12")] <- 12*0.0328767
chartreview$treatment_age_c[which(chartreview$treatment_age == "0" & chartreview$treatment_wks == "2")] <- 2*0.0328767
#coding here was pre-term 
chartreview$treatment_age_c[which(chartreview$treatment_wks == "38w 5d")] <- 0

chartreview$treatment_age_c[which(chartreview$treatment_age == "0" & chartreview$treatment_wks == "21")] <- 21*0.0328767
chartreview$treatment_age_c[which(chartreview$treatment_age == "0" & chartreview$treatment_wks == "25 do")] <- 25*0.0328767
chartreview$treatment_age_c[which(chartreview$treatment_age == "0" & chartreview$treatment_wks == "26 days")] <- 26*0.0328767
chartreview$treatment_age_c[which(chartreview$treatment_age == "0" & chartreview$treatment_wks == "5 days")] <- 5*0.0328767
chartreview$treatment_age_c[which(chartreview$treatment_age == "0" & chartreview$treatment_wks == "14 ")] <- 14*0.0328767


chartreview$treatment_age_c <- as.numeric(chartreview$treatment_age_c)
 
# chartreview %>% select(studyid, treatment_age, treatment_wks, treatment_age_c) %>% arrange(treatment_age_c) %>% kable()

#chartreview %>% select(studyid, treatment_age, treatment_wks, treatment_age_c) %>% write.csv(file = "../data/SMA_treatment_age_check_20241002.csv", row.names = F)

Write out files for analysis

#remove the reliability ratings (same exam, multiple raters) and use consensus
con_analysis <- analysis1 %>% 
  filter(grepl("con", rater_clean, ignore.case=T))

nonrelib_analysis <- analysis1 %>% 
  filter(studyid_clean %in% con_analysis$studyid_clean == FALSE, 
         vfss_analysis_complete != 0)

relib_analysis <- analysis1 %>% 
  filter(studyid_clean %in% con_analysis$studyid_clean) %>% 
  filter(!grepl("con", rater_clean, ignore.case=T))

#combine to keep only the consensus ratings and the non-reliability ones (single rated)
analysis2 <- bind_rows(con_analysis, nonrelib_analysis)


#Looks like there are some duplicate exam codes for some participants
dups <- analysis2$`Subject ID`[which(duplicated(paste0(analysis2$`Subject ID`, analysis2$Exam)))]

examdupstocheck <- analysis2 %>% 
  filter(`Subject ID` %in% dups) %>% 
  select(`Subject ID`, Exam, ID, studyid, studyid_clean, rater_clean) %>% 
  arrange(`Subject ID`, Exam)

examdupstocheck %>% 
  kable()

Remove duplicate and extra-rated cases (from email 2023-10-24)

studyidstoremove <- c("235 HM", "KLM 154", "227 HM", "KLM 180", "198 HM", "KLM 226", "KLM 37", "KLM 65", "30", "017")

analysis3 <- analysis2 %>% 
  filter(studyid %in% studyidstoremove == FALSE)

#recheck
summary(duplicated(paste0(analysis3$`Subject ID`, analysis3$Exam)))

analysis3 %>% 
  filter(`Subject ID` %in% dups) %>% 
  select(`Subject ID`, Exam, ID, studyid, studyid_clean, rater_clean) %>% 
  arrange(`Subject ID`, Exam) %>% 
  kable()
#add long chart review data to the analysis2 data
analysis4 <- analysis3 %>% 
  mutate(visit = as.numeric(gsub("Exam|_Analysis_Code","", Exam))) %>% 
  left_join(y=chartreview_long, by=c("Subject ID" = "studyid", "visit"), relationship = "one-to-one", suffix = c("a", "cr")) %>% 
  #remove case that was entered in error
  filter(comments_1 != "entered in error disregard.")
# write.csv(analysis4, file=paste0("../data/SMA_clean_analysis_data_", gsub("-", "", Sys.Date()),".csv"), row.names=F)
# write.csv(chartreview, file=paste0("../data/SMA_clean_chartReview_data_", gsub("-", "", Sys.Date()),".csv"), row.names=F)
# write.csv(relib_analysis, file = paste0("../data/SMA_clean_reliability_data_", gsub("-", "", Sys.Date()),".csv"), row.names=F)

Session information

sessionInfo()