Takes in both the long and wide versions of raw data.
# Add files to the correct section with the "nl_" or "nw_" prefixes in the object name.
### Original data versions (org_) #####
#imports the data with header
orig_SMA_DAT <- read.csv(file = "~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/SMA_N30_Single_v_All_Analysis_9_01_22_varnameupdate.csv", header=TRUE)
# #we'll only be taking participants 6 and 7 from the following data set
orig_SMA_DAT_suppl <- read.csv(file = "~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/BATCH1&2_SMA_EXPORT_20221206_varnameupdate.csv", header=TRUE)
### Newer long version of data (non-updated program; nl_) ####
nl_SMA_DAT_b2 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Spoden_Batch2_AugSept.csv") #
nl_SMA_DAT_b3 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_OctNov_Batch_Spoden .csv") #
#this one had "complete" for first and last name - update
nl_SMA_DAT_b3$firstname <- nl_SMA_DAT_b3$medicalrecordnumber
nl_SMA_DAT_b3$lastname <- nl_SMA_DAT_b3$medicalrecordnumber
nl_SMA_DAT_b4 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Spoden_June_Batch_SMA.csv") #
nl_SMA_DAT_b5 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Klein_1.csv") #
nl_SMA_DAT_b6 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned Batch 3 Swallowtail Ratings Dembroski.csv") #
nl_SMA_DAT_b7 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned Batch 2 Swallowtail Ratings Dembroski.csv") #
nl_SMA_DAT_b8 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Dembroski_1_cleaned.csv") #
#Add new items here, with nl_dataobjectname <- read.csv("SMA_Data/raw_data/FILENAME.csv")
nl_SMA_DAT_b9 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Batch 4 Swallowtail Ratings Dembroski.csv") #
nl_SMA_DAT_b10 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/spodenbatch5_cleaned.csv") #
nl_SMA_DATb11 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_BATCH1&2_SMA_EXPORT_Spoden.csv")
nl_SMA_DATb12 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Spoden_Batch2_AugSept.csv")#
nl_SMA_DATb13 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/SPODEN_SMA_251_233_REDO.csv") #
### Newer wide version of data (updated program; nw_) ####
nw_SMA_DAT_zb1 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Zoeller_1.csv")#
nw_SMA_DAT_zb2 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Turski_Batch_2.csv") #
nw_SMA_DAT_zb3 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Turski_Batch_3.csv") #
nw_SMA_DAT_zb4 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Turski_Batch_4.csv") #
nw_SMA_DAT_zb5 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Klein_2.csv") #
nw_SMA_DAT_zb6 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_BATCH_1_Export 9-4-23 Ochura.csv") #
nw_SMA_DAT_zb7 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Turski_Batch_5.csv") #
nw_SMA_DAT_zb8 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/CleanedBatch_4_Nov Ochura J.csv") #
nw_SMA_DAT_zb9 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/CleanedOchuraBATCH_2.csv") #
nw_SMA_DAT_zb10 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/CleanedOchuraBatch 3.csv") #removed the last few blank lines from this one
#Add new items here, with nw_dataobjectname <- read.csv("SMA_Data/raw_data/FILENAME.csv")
nw_SMA_DAT_zb11 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_IW_SMA_RELIABILITY2.csv") #
nw_SMA_DAT_zb12 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_IWilson-Batch1.csv") #
nw_SMA_DAT_zb13 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_McGhee_Batch1.csv") #
nw_SMA_DAT_zb14 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Turski_7.csv") #
nw_SMA_DAT_zb15 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Cleaned_Turski_Batch_6.csv") #
nw_SMA_DAT_zb16 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/McGhee_Batch2_Cleaned.csv") #
nw_SMA_DAT_zb17 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/Wilson_Batch3_Cleaned.csv") #
nw_SMA_DAT_zb18 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/WilsonCleaned_SMA_batch_2.csv") #
nw_SMA_DAT_zb19 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/IW_SMABatch4 (1) (1).csv") #
nw_SMA_DAT_zb20 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/IW_BATCH1ReDo_cleaned (1).csv")#
nw_SMA_DAT_zb21 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/AS_SMA_29.csv") #
nw_SMA_DAT_zb22 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/IW_SMA315_320.csv") #
nw_SMA_DAT_zb23 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/HM_July2025.csv") #
nw_SMA_DAT_zb24 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/ChanSMA_325_331.csv") #
nw_SMA_DAT_zb25 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/spodenSMA_324_331_323_322_321.csv") #
nw_SMA_DAT_zb26 <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_Data/raw_data/spodenSMAextra_Cleaned.csv") #
#read in file name/object pairing (above)
filenamekey <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/SMA_kinematics_filenames.csv")
### SMA chart data #####
# chartdata <- read.csv(file="~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/577565_McGrattan_SMA/BABYVFSSIMP/data/SMA_clean_chartReview_data_20250623.csv")
# analysisdata <- read.csv(file="~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/577565_McGrattan_SMA/BABYVFSSIMP/data/SMA_clean_analysis_data_20250623.csv")
# fois_sec <- read.csv(file="~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/577565_McGrattan_SMA_BabyVFSSImP/BABYVFSSIMP/data/SMA_fois_sec_combined.csv")
load("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/577565_McGrattan_SMA/BABYVFSSIMP/data/SMA_analysis_files.Rdata")
#log to connect subject id to exam #
idlog <- read.csv("~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/577565_McGrattan_SMA/BABYVFSSIMP/data/Clean_ID_log.csv")
Long data –> original format
Wide data –> original format
### nw --> orig ####
#get list of new wide datasets
nwtoconvert <- ls()[grepl("nw_", ls())]
#for each:
for (nw in nwtoconvert) {
temp <- get(nw)
#merge into long verion
temp <- temp %>%
mutate(across(.cols = B1:BCR, .fns = as.character)) %>%
pivot_longer(cols = B1:BCR,
names_to = "Measurement_name",
values_to = "value") %>%
#add units to new column and remove from value
mutate(unit = case_when(grepl("s", value) ~ "s",
grepl("cm", value) ~ "cm",
TRUE ~ ""),
value = gsub("s|cm", "", value))
#add variable about graph type
graph_ref <- nl_SMA_DAT_b2 %>%
group_by(graph, Measurement_name) %>%
tally()
#update measurement names to match
temp$Measurement_name[which(temp$Measurement_name == "Airwaycl")] <- "AIRWAYcl"
temp$Measurement_name[which(temp$Measurement_name == "BP1.Aecl")] <- "BP1AEcl"
temp$Measurement_name[which(temp$Measurement_name == "PCL")] <- "Pcl"
#units to s
temp$unit[which(temp$Measurement_name == "PESm.sec.")] <- "s"
temp$unit[which(temp$Measurement_name == "PESm.disp.")] <- "cm"
temp$Measurement_name[(grepl("PESm", temp$Measurement_name))] <- "PESm"
#add in graph variable based on matching measurements
temp$graph <- graph_ref$graph[match(temp$Measurement_name, graph_ref$Measurement_name)]
#select relevant columns
temp <- subset(temp, select = c("MRN", "Assessor", "Assessment.Date", "Pt..Name", "DOB", "name", "graph", "Measurement_name", "value", "unit"))
#make dateofassesment and performed by NA (will be best and worst swallow)
temp$Assessment.Date <- NA
temp$Pt..Name <- NA
temp$DOB <- NA
temp$norm <- NA
temp$sd <- NA
#convert names to match
names(temp) <- names(orig_SMA_DAT)
#save back to object
assign(nw, temp)
}
#get current date to use in file names when saving data in lines below
curdat <- gsub('-','',Sys.Date()) #gsub replaces dashes in Sys.Date() output with blanks
## pull participants 6 & 7 from SMA_DAT_suppl
orig_SMA_DAT_suppl67only <- orig_SMA_DAT_suppl[which(orig_SMA_DAT_suppl$Participant_Num %in% c(6,7)),]
## check
#View(orig_SMA_DAT_suppl67only)
## get all datasets (only use newer data for now)
SMA_DAT_list <- mget(grep("nl_|nw_", ls(), value=T))
#combine into one
SMA_DAT <- do.call("rbind", SMA_DAT_list)
#add variable with dataset
SMA_DAT$datafile <- gsub("\\..*", " ", row.names(SMA_DAT))
#add in variable that denotes dataset type
SMA_DAT$DataSetType <- unlist(lapply(strsplit(row.names(SMA_DAT), "_"), function(x) x[1]))
#check if there are duplicates in the data (rows exactly duplicated)
summary(duplicated(SMA_DAT))
SMA_DAT$Isdup <- duplicated(SMA_DAT)
#remove exact duplicates (caused by reading in the data multiple times)
SMA_DAT1 <- filter(SMA_DAT, Isdup == FALSE)
#create table of types by graph to make sure all the data are right
SMA_measurementtypes <- SMA_DAT1 %>%
group_by(DataSetType, graph, Measurement_name, units) %>%
tally() %>%
select(-n) %>%
pivot_wider(names_from = DataSetType,
values_from = graph)
unique(SMA_DAT1$Participant_Num)
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="eight")] <- 8
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="four")] <- 4
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="fourteen")] <- 14
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="thirty-one")] <- 31
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Two Eighteen")] <- 218
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Two Twenty Seven")] <- 227
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="One Sixty Six")] <- 166
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="One Fifty Nine")] <-159
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Two Hundred Eight")] <- 208
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Two Nineteen")] <- 219
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Two Forty Nine")] <- 249
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="One Fifty Seven")] <- 157
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="One Fifty Two")] <- 152
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="One Sixty Two")] <- 162
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Three thirty two")] <- 332
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Three Thirty Three")] <- 333
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Three Thirty Four")] <- 334
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Three Thirty Five")] <- 335
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Three Thirty Six")] <- 336
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Three Forty")] <- 340
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Three Thirty Nine")] <- 339
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Three Thirty Eight")] <- 338
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Three Twenty Six")] <- 326
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Three Forty Two")] <- 342
SMA_DAT1$Participant_Num[grepl("121", SMA_DAT1$Participant_Num)] <- 121
SMA_DAT1$Participant_Num[grepl("Swallow", SMA_DAT1$Participant_Num)] <- gsub("Swallow ", "", SMA_DAT1$Participant_Num[grepl("Swallow", SMA_DAT1$Participant_Num)])
SMA_DAT1$Participant_Num[grepl("p", SMA_DAT1$Participant_Num)] <- gsub("p", "", SMA_DAT1$Participant_Num[grepl("p", SMA_DAT1$Participant_Num)])
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="03")] <- 3
SMA_DAT1$Participant_Num[grepl("1a|1b", SMA_DAT1$Participant_Num)] <- 1
SMA_DAT1$Participant_Num[grepl("a", SMA_DAT1$Participant_Num)] <- gsub("a", "", SMA_DAT1$Participant_Num[grepl("a", SMA_DAT1$Participant_Num)])
SMA_DAT1$Participant_Num[grepl("video", SMA_DAT1$Participant_Num)] <- 325
SMA_DAT1$Participant_Num[which(is.na(SMA_DAT1$Participant_Num) & SMA_DAT1$Participant == 300)] <- 300
unique(SMA_DAT1$Participant_Num)
#which has a name?
#use Participant instead
SMA_DAT1 %>%
filter(grepl("Wilson", Participant_Num)) %>%
group_by(Participant) %>%
slice_head(n=1)
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Irena Wilson")] <- SMA_DAT1$Participant[which(SMA_DAT1$Participant_Num=="Irena Wilson")]
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Iren Wilson")] <- SMA_DAT1$Participant[which(SMA_DAT1$Participant_Num=="Iren Wilson")]
#which is "no swallows"
SMA_DAT1 %>%
filter(Participant_Num == "no swallows") %>%
group_by(Participant) %>%
slice_head(n=1) #should be removed
#which is "no swllows
SMA_DAT1 %>%
filter(Participant_Num == "no swllows") %>%
group_by(Participant) %>%
slice_head(n=1)
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="no swllows")] <- SMA_DAT1$Participant[which(SMA_DAT1$Participant_Num=="no swllows")]
#which is "Comlete"
#SMA_DAT1[which(SMA_DAT1$Participant_Num == "Comlete"),] #in participant
SMA_DAT1 %>%
filter(Participant_Num == "Comlete") %>%
group_by(Participant) %>%
slice_head(n=1)
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="Comlete")] <- SMA_DAT1$Participant[which(SMA_DAT1$Participant_Num=="Comlete")]
#what is the "" row?
SMA_DAT1[which(SMA_DAT1$Participant_Num == ""),] #substitute participant
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num == "")] <- SMA_DAT1$Participant[which(SMA_DAT1$Participant_Num == "")]
#what is the NA row?
SMA_DAT1[which(is.na(SMA_DAT1$Participant_Num)),] #blank case, will remove
unique(SMA_DAT1$Participant_Num)
Check if any values in “Participant” do not match “Participant_Num”
#check to see if Participant and Participant_Num don't match
SMA_DAT1 %>%
select(Participant, Participant_Num, datafile) %>%
mutate(match = ifelse(Participant == Participant_Num, 1, 0)) %>%
filter(match == 0) %>%
group_by(Participant, Participant_Num) %>%
slice_head(n=1)
#Participant 107 should be 75
SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num=="107")] <- SMA_DAT1$Participant[which(SMA_DAT1$Participant_Num=="107")]
SMA_DAT1 <- filter(SMA_DAT1,
!is.na(Participant_Num),
Participant_Num != "no swallows",
Participant_Num != "")
# SMA_DAT1 %>%
# filter(Participant_Num %in% c(16, 23, 11)) %>%
# group_by(Participant_Num) %>%
# select(Participant_Num, Participant, datafile) %>%
# slice_head(n=1)
Check participant by file to make sure the correct data are included. Need to update data to keep newest for 288.
part_by_file <- SMA_DAT1 %>%
group_by(datafile, Participant_Num) %>%
tally() %>%
arrange(Participant_Num) %>%
full_join(filenamekey, by=c("datafile" = "object")) %>%
mutate(Included = ifelse(Participant_Num %in% analysis_included_lastexam$studyid_clean, 1, 0))
#write.csv(part_by_file, file="Summary_of_Kinematic_files.csv", row.names = F)
#reliability?
dupids <- part_by_file$Participant_Num[which(duplicated(part_by_file$Participant_Num))]
#remove earlier data from participant 288
SMA_DAT1 <- SMA_DAT1[-which(SMA_DAT1$Participant_Num==288 & SMA_DAT1$datafile == "nl_SMA_DAT_b10 "),]
Check matches using participant number
#check studyID matches
summary(SMA_DAT1$Participant_Num %in% analysis_included$studyid_clean)
# They do not all match...
(nomatch <- unique(SMA_DAT1$Participant_Num[which(SMA_DAT1$Participant_Num %in% analysis_included$studyid_clean == FALSE)]))
#which files are they from?
nomatch_dat <- SMA_DAT1[which(SMA_DAT1$Participant_Num %in% nomatch),]
nomatch_dat %>%
group_by(datafile, Participant_Num) %>%
tally()
#only 223 should be in there, rest are duplicates.
#other way?
summary(analysis_included$studyid_clean %in% SMA_DAT1$Participant_Num)
unique(analysis_included$studyid_clean[which(analysis_included$studyid_clean %in% SMA_DAT1$Participant_Num == FALSE)])[order(unique(analysis_included$studyid_clean[which(analysis_included$studyid_clean %in% SMA_DAT1$Participant_Num == FALSE)]))]
#236 - this participant had no swallows
Add subject id to exams using BabyVFSSimP id log (cleaned and written out in the BabyVFSSimP data cleaning script)
#add subject id to exams
summary(SMA_DAT1$Participant_Num %in% idlog$studyid_clean)
SMA_DAT1$Subject.ID <- idlog$Subject.ID[match(SMA_DAT1$Participant_Num, idlog$studyid_clean)]
Check treated included
analysis_included$In_Kinematics <- ifelse(analysis_included$Subject.ID %in% SMA_DAT1$Subject.ID, 1, 0)
(t_notinclude <- analysis_included %>%
group_by(Subject.ID) %>%
summarize(Included = sum(In_Kinematics)) %>%
#find any not included
filter(Included == 0))
Add group to the SMA data
#merge with treatment data
treatmentinfor_tomerge <- analysis_included %>%
select(studyid_clean, treat_group)
#merge datasets
SMA_DAT2 <- SMA_DAT1 %>%
mutate(Participant_Num = as.numeric(Participant_Num)) %>%
inner_join(treatmentinfor_tomerge, by=c("Participant_Num" = "studyid_clean"))
SMA_DAT2 %>%
group_by(treat_group) %>%
summarize(n = n_distinct(Subject.ID))
SMA_DAT2_LastExam <- SMA_DAT2 %>%
filter(Participant_Num %in% analysis_included_lastexam$studyid_clean)
Partition out reliability exams and keep only consensus in analysis
part_by_file_up <- read_excel(path = "~/Library/CloudStorage/Box-Box/CLA RSS Data Sharing/1022295_McGrattan_SMA_KINEMATICS/McGrattan_SMA_Scripts/Summary_of_Kinematic_files_addinfo.xlsx")
relib_files <- part_by_file_up %>%
filter(Reliability == 1, Participant_Num %in% analysis_included_lastexam$studyid_clean)
con_files <- part_by_file_up %>%
filter(Concensus == "x", Participant_Num %in% analysis_included_lastexam$studyid_clean)
SMA_reliability <- SMA_DAT2_LastExam %>%
filter(paste(Participant_Num, trimws(datafile)) %in% paste(relib_files$Participant_Num, relib_files$datafile))
SMA_consensus <- SMA_DAT2_LastExam %>%
filter(paste(Participant_Num, trimws(datafile)) %in% paste(con_files$Participant_Num, con_files$datafile))
#remove reliability and consensus cases, then add the consensus back in.
SMA_DAT2_LastExam1 <- SMA_DAT2_LastExam %>%
filter(Participant_Num %in% unique(c(SMA_consensus$Participant_Num, SMA_reliability$Participant_Num)) == F) %>%
bind_rows(SMA_consensus)
Create data subsets with only the measures of interest. Using Last exam data for now.
#Making a sub dataset with only rows that have these variables
#writing out all participants now
SMA_DAT_sub <- SMA_DAT2_LastExam1 %>%
filter( Measurement_name == 'PCR' | Measurement_name == "OPT" | Measurement_name == "HPT" | Measurement_name == "TPT" | Measurement_name == "PESop") %>%
mutate(value = as.numeric(value))
#Changing PESop from factor to character bc things were wonky for the next steps
SMA_DAT_sub$Measurement_name <- as.character(SMA_DAT_sub$Measurement_name)
#same for reliabiliy
#writing out all participants now
SMA_DAT_sub_relib <- SMA_reliability %>%
filter( Measurement_name == 'PCR' | Measurement_name == "OPT" | Measurement_name == "HPT" | Measurement_name == "TPT" | Measurement_name == "PESop") %>%
mutate(value = as.numeric(value))
#Changing PESop from factor to character bc things were wonky for the next steps
SMA_DAT_sub_relib$Measurement_name <- as.character(SMA_DAT_sub_relib$Measurement_name)
Summarize data within each participant. Finding the min, max, standard deviation and outputting into a file called SMA DAT SUM. Also removing any NA’s from analysis.
This is grouped by SUBJECT ID for the last exam.
SMA_DAT_summary <- SMA_DAT_sub %>%
group_by(Subject.ID, Measurement_name, treat_group) %>%
dplyr::summarise(Lowest_Every= min(value, na.rm = T),
Highest_Every = max(value, na.rm = T),
Mean = mean(value, na.rm = T),
STD = sd(value, na.rm = T),
n_obvs = n(),
fivep = ceiling(n_obvs*0.05),
Lowest_5p = mean(Small(value, k=fivep), na.rm=T),
Highest_5p = mean(Large(value, k=fivep), na.rm=T),
n_visit = n_distinct(Participant_Num)) %>% #get total number of observations for each participant
#create OI scores - Worst is highest of all except for PES which is lowest
mutate(Worst_OI = case_when(Measurement_name == "PESop" ~ Lowest_Every,
Measurement_name != "PESop" ~ Highest_Every),
Best_OI = case_when(Measurement_name == "PESop" ~ Highest_Every,
Measurement_name != "PESop" ~ Lowest_Every),
Worst_OI_5p = case_when(Measurement_name == "PESop" ~ Lowest_5p,
Measurement_name != "PESop" ~ Highest_5p),
Best_OI_5p = case_when(Measurement_name == "PESop" ~ Highest_5p,
Measurement_name != "PESop" ~ Lowest_5p))
## `summarise()` has grouped output by 'Subject.ID',
## 'Measurement_name'. You can override using the `.groups`
## argument.
#replace -inf and inf with NA
SMA_DAT_summary[SMA_DAT_summary == -Inf] <- NA
SMA_DAT_summary[SMA_DAT_summary == Inf] <- NA
#review table
#View(SMA_DAT_summary)
#for reliability
SMA_DAT_summary_relib <- SMA_DAT_sub_relib %>%
group_by(Subject.ID, datafile, Measurement_name, treat_group) %>%
dplyr::summarise(Lowest_Every= min(value, na.rm = T),
Highest_Every = max(value, na.rm = T),
Mean = mean(value, na.rm = T),
STD = sd(value, na.rm = T),
n_obvs = n(),
fivep = ceiling(n_obvs*0.05),
Lowest_5p = mean(Small(value, k=fivep), na.rm=T),
Highest_5p = mean(Large(value, k=fivep), na.rm=T),
n_visit = n_distinct(Participant_Num)) %>% #get total number of observations for each participant
#create OI scores - Worst is highest of all except for PES which is lowest
mutate(Worst_OI = case_when(Measurement_name == "PESop" ~ Lowest_Every,
Measurement_name != "PESop" ~ Highest_Every),
Best_OI = case_when(Measurement_name == "PESop" ~ Highest_Every,
Measurement_name != "PESop" ~ Lowest_Every),
Worst_OI_5p = case_when(Measurement_name == "PESop" ~ Lowest_5p,
Measurement_name != "PESop" ~ Highest_5p),
Best_OI_5p = case_when(Measurement_name == "PESop" ~ Highest_5p,
Measurement_name != "PESop" ~ Lowest_5p))
## `summarise()` has grouped output by 'Subject.ID', 'datafile',
## 'Measurement_name'. You can override using the `.groups`
## argument.
#replace -inf and inf with NA
SMA_DAT_summary_relib[SMA_DAT_summary_relib == -Inf] <- NA
SMA_DAT_summary_relib[SMA_DAT_summary_relib == Inf] <- NA
Review dimensions
#review dimensions; each participant should only have 5 measurements; LOOKS GOOD
table(SMA_DAT_summary$Subject.ID, SMA_DAT_summary$Measurement_name)
Export cleaned data to use for analysis.
# #export GENERAL SUMMARY to csv (file showing min, max, std, n_observations for each participant every swallow analysis)
# write.csv(SMA_DAT_summary, paste0('../SMA_Data/SMA_DAT_summary_last',curdat,'.csv'), row.names=FALSE)
#
# #write out the data file
# write.csv(SMA_DAT_sub, paste0('../SMA_Data/SMA_DAT_combined_last',curdat,'.csv'), row.names = FALSE)
#
# #write out the reliability file
# write.csv(SMA_DAT_summary_relib, paste0('../SMA_Data/SMA_DAT_reliab_last',curdat,'.csv'), row.names = FALSE)
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sonoma 14.4.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Chicago
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] DescTools_0.99.60 knitr_1.50 vcd_1.4-13 readxl_1.4.5
## [5] tidyr_1.3.1 ggplot2_3.5.2 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 utf8_1.2.6 generics_0.1.4 class_7.3-23
## [5] lattice_0.22-7 hms_1.1.3 digest_0.6.37 magrittr_2.0.3
## [9] evaluate_1.0.4 RColorBrewer_1.1-3 mvtnorm_1.3-3 fastmap_1.2.0
## [13] Matrix_1.7-3 cellranger_1.1.0 jsonlite_2.0.0 e1071_1.7-16
## [17] httr_1.4.7 purrr_1.0.4 scales_1.4.0 jquerylib_0.1.4
## [21] cli_3.6.5 expm_1.0-0 rlang_1.1.6 withr_3.0.2
## [25] cachem_1.1.0 yaml_2.3.10 rootSolve_1.8.2.4 tools_4.5.1
## [29] tzdb_0.5.0 lmom_3.2 gld_2.6.7 Exact_3.3
## [33] colorspace_2.1-1 forcats_1.0.0 pacman_0.5.1 boot_1.3-31
## [37] vctrs_0.6.5 R6_2.6.1 zoo_1.8-14 proxy_0.4-27
## [41] lifecycle_1.0.4 fs_1.6.6 MASS_7.3-65 pkgconfig_2.0.3
## [45] pillar_1.10.2 bslib_0.9.0 gtable_0.3.6 Rcpp_1.0.14
## [49] glue_1.8.0 data.table_1.17.6 haven_2.5.5 xfun_0.52
## [53] tibble_3.3.0 lmtest_0.9-40 tidyselect_1.2.1 rstudioapi_0.17.1
## [57] farver_2.1.2 htmltools_0.5.8.1 rmarkdown_2.29 readr_2.1.5
## [61] compiler_4.5.1