I'm trying to fill an online form and scrape the results.
Using Rselenium I'm able to fill the data for one row:
library(RSelenium)
library(xml2)
library(tidyverse)
library(rvest)
# Start Selenium Server --------------------------------------------------------
# https://docs.ropensci.org/RSelenium/articles/basics.html#connecting-to-a-selenium-server-1
# https://www.akipredictor.com/en/aki_predictor/
rD <- rsDriver(browser="firefox", port=4545L, verbose=F)
remDr <- rD[["client"]]
# form ------------------------------------------------------------------
remDr$navigate('https://www.akipredictor.com/en/aki_predictor/')
remDr$findElement(using = "name", value = "agree_to_legal_terms")$clickElement()
#Pre-admission information
webElemAge <- remDr$findElement(using = "name", value = "age")
webElemAge$sendKeysToElement(list("70"))
webElemBaselineSCreat <- remDr$findElement(using = "name", value = "baseline_screat")
webElemBaselineSCreat$sendKeysToElement(list("1"))
webElemIsDiabetic <- remDr$findElement(using = "name", value = "is_diabetic")
webElemIsDiabetic$sendKeysToElement(list("Yes"))
webElemIsElectiveAdmited <- remDr$findElement(using = "name", value = "is_elective_admitted")
webElemIsElectiveAdmited$sendKeysToElement(list("Unplanned admission"))
webElemTypeOfSurgery <- remDr$findElement(using = "name", value = "type_of_surgery")
webElemTypeOfSurgery$sendKeysToElement(list("Transplant surgery"))
# ICU admission information
remDr$findElement(using = "name", value = "show_admission")$clickElement()
webElemBloodGlucose <- remDr$findElement(using = "name", value = "blood_glucose")
webElemBloodGlucose$sendKeysToElement(list("200"))
webElemHasSuspectedSepsis <- remDr$findElement(using = "name", value = "has_suspected_sepsis")
webElemHasSuspectedSepsis$sendKeysToElement(list("Yes"))
webElemHDSupport <- remDr$findElement(using = "name", value = "hd_support")
webElemHDSupport$sendKeysToElement(list("Pharmacological"))
# Day 1 information
remDr$findElement(using = "name", value = "show_day1")$clickElement()
webElemCreatinineD1 <- remDr$findElement(using = "name", value = "creatinine_d1")
webElemCreatinineD1$sendKeysToElement(list("1.2"))
webElemApacheIID1 <- remDr$findElement(using = "name", value = "apacheII_d1")
webElemApacheIID1$sendKeysToElement(list("30"))
webElemMaxLactateD1 <- remDr$findElement(using = "name", value = "max_lactate_d1")
webElemMaxLactateD1$sendKeysToElement(list("10"))
webElemBilirrubinD1 <- remDr$findElement(using = "name", value = "bilirubin_d1")
webElemBilirrubinD1$sendKeysToElement(list("2"))
webElemHoursOfICUStay <- remDr$findElement(using = "name", value = "hours_of_icu_stay")
webElemHoursOfICUStay$sendKeysToElement(list("24"))
remDr$findElement(using = "name", value = "predict_day1_dev")$clickElement()
# extract HTML -----------------
Sys.sleep(5) # give the page time to fully load
html <- remDr$getPageSource()[[1]]
results <- read_html(html) %>% # parse HTML
html_nodes("div") %>% # extract node"
.[12] %>%
.[[1]] %>%
html_text() # string
results <- results %>%
str_replace_all("
", "") %>%
str_replace_all(" ", "") %>%
str_replace_all("RiskofdevelopingAKIduringthefirstweekofICUstay", "") %>%
str_replace_all("AdvancedoptionsChoosetheclassificationthresholdClickonthequestionmarkforadditionalexplanationSincethepredictedriskisabovethechosenclassificationthreshold,thepatientisclassifiedasdevelopingAKIwithinthefirstweekofICUstay.DependingontheintendeduseoftheAKIpredictor,theusermaychoosetoadapttheclassificationthreshold,andevaluatetheeffectonthestatisticsbelow.Thedefaultclassificationthresholdof14.5maximizedbothsensitivityandspecificityinthestudieddatabase.Sensitivity:63.8%Specificity:81.9%PPV:38.0%NPV:92.8%ΔNetbenefitNone:6.8%ΔNetbenefitAll:6.4%Atthechosenclassificationthreshold,AKIpredictorcorrectlyidentifies63.8%ofthepatientswhodevelopedAKIinthestudieddatabaseAtthechosenclassificationthreshold,AKIpredictorcorrectlyidentifies81.9%ofthepatientswhodidnotdevelopAKIinthestudieddatabaseInthestudieddatabase,38.0%ofthepatientswhodevelopedAKIhadapredictedriskabovethechosenclassificationthresholdInthestudieddatabase,92.8%ofthepatientswhodidnotdevelopAKIhadapredictedriskbelowthechosenclassificationthresholdAtthechosenclassificationthreshold,AKIpredictorincreasesthepercentageofcorrectlyidentifiedAKIby6.8%inthestudieddatabase,withoutincreasingfalseclassifications,ascomparedtoconsideringnopatientwilldevelopAKI.OnlyuseaclassificationthresholdthatresultsinaΔNetbenefitNone>0Atthechosenclassificationthreshold,AKIpredictordecreasesthepercentageofmisclassifiedAKIby6.4%inthestudieddatabase,whilekeepingthesamenumberofcorrectclassifications,ascomparedtoconsideringallpatientswilldevelopAKIOnlyuseaclassificationthresholdthatresultsinaΔNetbenefitAll>0Clickonthestatisticsfordetails", "")
results
remDr$close()
I need to do the same process using data from a data frame. I have tried the following code:
rD <- rsDriver(browser="firefox", port=4560L, verbose=F)
remDr <- rD[["client"]]
remDr$navigate('https://www.akipredictor.com/en/aki_predictor/')
scrape.AKIpredictor <- function(age, baselineSCreat, IsDiabetic, IsElectiveAdmited , TypeOfSurgery,
Glucose, SuspectedSepsis, HDSupport,
CreatinineD1, ApacheIID1, MaxLactateD1, BilirrubinD1, HoursOfICUStay) {
remDr$findElement(using = "name", value = "agree_to_legal_terms")$clickElement()
#Pre-admission information
webElemAge <- remDr$findElement(using = "name", value = "age")
webElemAge$sendKeysToElement(list(age))
webElemBaselineSCreat <- remDr$findElement(using = "name", value = "baseline_screat")
webElemBaselineSCreat$sendKeysToElement(list(baselineSCreat))
webElemIsDiabetic <- remDr$findElement(using = "name", value = "is_diabetic")
webElemIsDiabetic$sendKeysToElement(list(IsDiabetic))
webElemIsElectiveAdmited <- remDr$findElement(using = "name", value = "is_elective_admitted")
webElemIsElectiveAdmited$sendKeysToElement(list(IsElectiveAdmited))
webElemTypeOfSurgery <- remDr$findElement(using = "name", value = "type_of_surgery")
webElemTypeOfSurgery$sendKeysToElement(list(TypeOfSurgery))
# ICU admission information
remDr$findElement(using = "name", value = "show_admission")$clickElement()
webElemBloodGlucose <- remDr$findElement(using = "name", value = "blood_glucose")
webElemBloodGlucose$sendKeysToElement(list(Glucose))
webElemHasSuspectedSepsis <- remDr$findElement(using = "name", value = "has_suspected_sepsis")
webElemHasSuspectedSepsis$sendKeysToElement(list(SuspectedSepsis))
webElemHDSupport <- remDr$findElement(using = "name", value = "hd_support")
webElemHDSupport$sendKeysToElement(list(HDSupport))
# Day 1 information
remDr$findElement(using = "name", value = "show_day1")$clickElement()
webElemCreatinineD1 <- remDr$findElement(using = "name", value = "creatinine_d1")
webElemCreatinineD1$sendKeysToElement(list(CreatinineD1))
webElemApacheIID1 <- remDr$findElement(using = "name", value = "apacheII_d1")
webElemApacheIID1$sendKeysToElement(list(ApacheIID1))
webElemMaxLactateD1 <- remDr$findElement(using = "name", value = "max_lactate_d1")
webElemMaxLactateD1$sendKeysToElement(list(MaxLactateD1))
webElemBilirrubinD1 <- remDr$findElement(using = "name", value = "bilirubin_d1")
webElemBilirrubinD1$sendKeysToElement(list(BilirrubinD1))
webElemHoursOfICUStay <- remDr$findElement(using = "name", value = "hours_of_icu_stay")
webElemHoursOfICUStay$sendKeysToElement(list(HoursOfICUStay))
remDr$findElement(using = "name", value = "predict_day1_dev")$clickElement()
Sys.sleep(5) # give the page time to fully load
html <- remDr$getPageSource()[[1]]
results <- read_html(html) %>% # parse HTML
html_nodes("div") %>% # extract node"
.[12] %>%
.[[1]] %>%
html_text() # string
results <- results %>% #trim trim trim
str_replace_all("
", "") %>%
str_replace_all(" ", "") %>%
str_replace_all("RiskofdevelopingAKIduringthefirstweekofICUstay", "") %>%
str_replace_all("AdvancedoptionsChoosetheclassificationthresholdClickonthequestionmarkforadditionalexplanationSincethepredictedriskisabovethechosenclassificationthreshold,thepatientisclassifiedasdevelopingAKIwithinthefirstweekofICUstay.DependingontheintendeduseoftheAKIpredictor,theusermaychoosetoadapttheclassificationthreshold,andevaluatetheeffectonthestatisticsbelow.Thedefaultclassificationthresholdof14.5maximizedbothsensitivityandspecificityinthestudieddatabase.Sensitivity:63.8%Specificity:81.9%PPV:38.0%NPV:92.8%ΔNetbenefitNone:6.8%ΔNetbenefitAll:6.4%Atthechosenclassificationthreshold,AKIpredictorcorrectlyidentifies63.8%ofthepatientswhodevelopedAKIinthestudieddatabaseAtthechosenclassificationthreshold,AKIpredictorcorrectlyidentifies81.9%ofthepatientswhodidnotdevelopAKIinthestudieddatabaseInthestudieddatabase,38.0%ofthepatientswhodevelopedAKIhadapredictedriskabovethechosenclassificationthresholdInthestudieddatabase,92.8%ofthepatientswhodidnotdevelopAKIhadapredictedriskbelowthechosenclassificationthresholdAtthechosenclassificationthreshold,AKIpredictorincreasesthepercentageofcorrectlyidentifiedAKIby6.8%inthestudieddatabase,withoutincreasingfalseclassifications,ascomparedtoconsideringnopatientwilldevelopAKI.OnlyuseaclassificationthresholdthatresultsinaΔNetbenefitNone>0Atthechosenclassificationthreshold,AKIpredictordecreasesthepercentageofmisclassifiedAKIby6.4%inthestudieddatabase,whilekeepingthesamenumberofcorrectclassifications,ascomparedtoconsideringallpatientswilldevelopAKIOnlyuseaclassificationthresholdthatresultsinaΔNetbenefitAll>0Clickonthestatisticsfordetails", "")
remDr$findElement(using = "name", value = "empty_form")$clickElement()
return(results)
}
#data frame
age <- c(50, 70, 80)
baselineSC