R语言读CSV、txt文件方式以及read.table read.csv 和readr（大数据读取包） ...

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› R语言›R语言教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

首先准备测试数据*(mtcars)

分别为CSV. TXT

read.table 默认形式读取CSV（×）与TXT(效果理想)

[plain]view
plain copy

<span style="font-size:14px;">①  

> test<-read.table("C:/Users/admin/Desktop/test.txt",header = F)  

Error in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,  :   

  line 1 did not have 12 elements  

> test<-read.table("C:/Users/admin/Desktop/test.txt")  

> str(test)  

'data.frame':   32 obs. of  11 variables:  

 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...  

 $ cyl : int  6 6 4 6 8 6 8 4 4 6 ...  

 $ disp: num  160 160 108 258 360 ...  

 $ hp  : int  110 110 93 110 175 105 245 62 95 123 ...  

 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...  

 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...  

 $ qsec: num  16.5 17 18.6 19.4 17 ...  

 $ vs  : int  0 0 1 1 0 1 0 1 1 1 ...  

 $ am  : int  1 1 1 0 0 0 0 0 0 0 ...  

 $ gear: int  4 4 4 3 3 3 3 4 4 4 ...  

 $ carb: int  4 4 1 1 2 1 4 2 2 4 ...  

> attributes(test)  

$names  

 [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear" "carb"  

$class  

[1] "data.frame"  

$row.names  

 [1] "Mazda RX4"           "Mazda RX4 Wag"       "Datsun 710"          "Hornet 4 Drive"       

 [5] "Hornet Sportabout"   "Valiant"             "Duster 360"          "Merc 240D"            

 [9] "Merc 230"            "Merc 280"            "Merc 280C"           "Merc 450SE"           

[13] "Merc 450SL"          "Merc 450SLC"         "Cadillac Fleetwood"  "Lincoln Continental"  

[17] "Chrysler Imperial"   "Fiat 128"            "Honda Civic"         "Toyota Corolla"       

[21] "Toyota Corona"       "Dodge Challenger"    "AMC Javelin"         "Camaro Z28"           

[25] "Pontiac Firebird"    "Fiat X1-9"           "Porsche 914-2"       "Lotus Europa"         

[29] "Ford Pantera L"      "Ferrari Dino"        "Maserati Bora"       "Volvo 142E"  </span>



[plain] view plain copy
 


<span style="background-color: rgb(255, 0, 0);">②效果不理想，没有data.frame</span>  
> test<-read.table("C:/Users/admin/Desktop/test.csv")  
#变量类型识别遗漏  
> str(test)  
'data.frame':   33 obs. of  2 variables:  
 $ V1: Factor w/ 33 levels "","AMC Javelin",..: 1 19 20 6 14 15 32 8 22 21 ...  
 $ V2: Factor w/ 33 levels ",\"mpg\",\"cyl\",\"disp\",\"hp\",\"drat\",\"wt\",\"qsec\",\"vs\",\"am\",\"gear\",\"carb\"",..: 1 20 21 25 23 16 15 5 27 26 ...  
> attributes(test)  
$names  
[1] "V1" "V2"  
  
$class  
[1] "data.frame"  
  
$row.names  
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33  

修改后：还可以具体根据自己需要
③

> test<-read.table("C:/Users/admin/Desktop/test.csv",header = T,sep=",")
> str(test)
'data.frame':	32 obs. of  12 variables:
 $ X   : Factor w/ 32 levels "AMC Javelin",..: 18 19 5 13 14 31 7 21 20 22 ...
 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 $ cyl : int  6 6 4 6 8 6 8 4 4 6 ...
 $ disp: num  160 160 108 258 360 ...
 $ hp  : int  110 110 93 110 175 105 245 62 95 123 ...
 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
 $ qsec: num  16.5 17 18.6 19.4 17 ...
 $ vs  : int  0 0 1 1 0 1 0 1 1 1 ...
 $ am  : int  1 1 1 0 0 0 0 0 0 0 ...
 $ gear: int  4 4 4 3 3 3 3 4 4 4 ...
 $ carb: int  4 4 1 1 2 1 4 2 2 4 ...
> attributes(test)
$names
 [1] "X"    "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear" "carb"

$class
[1] "data.frame"

$row.names
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32

④效果同③ read.table 是读取矩形格子状数据最为便利的方式

> test<-read.csv("C:/Users/admin/Desktop/test.csv",head=T,sep=",")
> str(test)
'data.frame':	32 obs. of  12 variables:
 $ X   : Factor w/ 32 levels "AMC Javelin",..: 18 19 5 13 14 31 7 21 20 22 ...
 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 $ cyl : int  6 6 4 6 8 6 8 4 4 6 ...
 $ disp: num  160 160 108 258 360 ...
 $ hp  : int  110 110 93 110 175 105 245 62 95 123 ...
 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
 $ qsec: num  16.5 17 18.6 19.4 17 ...
 $ vs  : int  0 0 1 1 0 1 0 1 1 1 ...
 $ am  : int  1 1 1 0 0 0 0 0 0 0 ...
 $ gear: int  4 4 4 3 3 3 3 4 4 4 ...
 $ carb: int  4 4 1 1 2 1 4 2 2 4 ...
> attributes(test)
$names
 [1] "X"    "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear" "carb"

$class
[1] "data.frame"

$row.names
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32

⑤：read.csv读txt。丢失数据结构，1 variable

> test<-read.csv("C:/Users/admin/Desktop/test.txt",head=T,sep=",")
> str(test)
'data.frame':	32 obs. of  1 variable:
 $ mpg.cyl.disp.hp.drat.wt.qsec.vs.am.gear.carb: Factor w/ 32 levels "AMC Javelin 15.2 8 304 150 3.15 3.435 17.3 0 0 3 2",..: 18 19 5 13 14 31 7 21 20 22 ...
> attributes(text)
NULL
> attributes(test)
$names
[1] "mpg.cyl.disp.hp.drat.wt.qsec.vs.am.gear.carb"

$class
[1] "data.frame"

$row.names
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32

⑥使用readr包中read_csv读取情况，其适合

[plain]view
plain copy

> test<-read_csv("C:/Users/admin/Desktop/test.csv")  

Parsed with column specification:  

cols(  

  X1 = col_character(),  

  mpg = col_double(),  

  cyl = col_integer(),  

  disp = col_double(),  

  hp = col_integer(),  

  drat = col_double(),  

  wt = col_double(),  

  qsec = col_double(),  

  vs = col_integer(),  

  am = col_integer(),  

  gear = col_integer(),  

  carb = col_integer()  

)  

Warning message:  

Missing column names filled in: 'X1' [1]   

> test  

# A tibble: 32 × 12  

                  X1   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb  

               <chr> <dbl> <int> <dbl> <int> <dbl> <dbl> <dbl> <int> <int> <int> <int>  

1          Mazda RX4  21.0     6 160.0   110  3.90 2.620 16.46     0     1     4     4  

2      Mazda RX4 Wag  21.0     6 160.0   110  3.90 2.875 17.02     0     1     4     4  

3         Datsun 710  22.8     4 108.0    93  3.85 2.320 18.61     1     1     4     1  

4     Hornet 4 Drive  21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1  

5  Hornet Sportabout  18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2  

6            Valiant  18.1     6 225.0   105  2.76 3.460 20.22     1     0     3     1  

7         Duster 360  14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4  

8          Merc 240D  24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2  

9           Merc 230  22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2  

10          Merc 280  19.2     6 167.6   123  3.92 3.440 18.30     1     0     4     4  

# ... with 22 more rows<pre code_snippet_id="2469924" snippet_file_name="blog_20170704_3_6619536" tabindex="0" class="GGHFMYIBMOB" id="rstudio_console_output" style="font-family: 'Lucida Console'; font-size: 10pt !important; outline: none; border: none; word-break: break-all; margin: 0px; -webkit-user-select: text; white-space: pre-wrap !important; line-height: 15px; color: rgb(0, 0, 0); font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; orphans: auto; text-align: -webkit-left; text-indent: 0px; text-transform: none; widows: auto; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(255, 255, 255);" name="code"><pre code_snippet_id="2469924" snippet_file_name="blog_20170704_3_6619536" name="code" class="plain">> str(test)  

Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   32 obs. of  12 variables:  

 $ X1  : chr  "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...  

 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...  

 $ cyl : int  6 6 4 6 8 6 8 4 4 6 ...  

 $ disp: num  160 160 108 258 360 ...  

 $ hp  : int  110 110 93 110 175 105 245 62 95 123 ...  

 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...  

 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...  

 $ qsec: num  16.5 17 18.6 19.4 17 ...  

 $ vs  : int  0 0 1 1 0 1 0 1 1 1 ...  

 $ am  : int  1 1 1 0 0 0 0 0 0 0 ...  

 $ gear: int  4 4 4 3 3 3 3 4 4 4 ...  

 $ carb: int  4 4 1 1 2 1 4 2 2 4 ...  

 - attr(*, "spec")=List of 2  

  ..$ cols   :List of 12  

  .. ..$ X1  : list()  

  .. .. ..- attr(*, "class")= chr  "collector_character" "collector"  

c"  

> attributes(test)  

$class  

[1] "tbl_df"     "tbl"        "data.frame"  

$row.names  

 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32  

$names  

 [1] "X1"   "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear" "carb"  

$spec  

cols(  

  X1 = col_character(),  

  mpg = col_double(),  

  cyl = col_integer(),  

  disp = col_double(),  

  hp = col_integer(),  

  drat = col_double(),  

  wt = col_double(),  

  qsec = col_double(),  

  vs = col_integer(),  

  am = col_integer(),  
 
鲜花

握手

雷人

路过

鸡蛋