This is the program
Loading of packages
knitr::opts_chunk$set(echo = TRUE, cache = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(caret)
library(GGally)
library(lattice)
library(corrplot)
library(factoextra)
library(FactoMineR)
library(magrittr)
theme_set(theme_bw())
set.seed(181019)
Data loading
train <- readr::read_csv("train.csv")
test <- readr::read_csv("test.csv")
Missing value
missing_threshold <- .4
is_too_scarce <- lapply(select(train, -SalePrice), function(x) mean(is.na(x)) > missing_threshold)
is_too_scarce <- map_lgl(select(train, -SalePrice), ~mean(is.na(.x)) > missing_threshold)
not_too_scarce <- names(is_too_scarce)[!is_too_scarce]
train <- select(train, SalePrice, not_too_scarce)
train %<>% select(SalePrice, not_too_scarce)
test %<>% select(not_too_scarce)
Preprocessing
imputedData <- preProcess( select(train, -SalePrice),
method = c("center", "scale", "knnImpute", "nzv", 'YeoJohnson')
)
#install.packages('RANN')
library(RANN)
testTrans <- predict(imputedData, test)
trainTrans <- predict(imputedData, train)
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…