

示例工具:R x64 3.5.3、RStudio
本文讲解内容:数据概览
适用范围:数据导入、导出、概览

read.table(file, header = FALSE, sep = "", quote = "\"'",row.names, col.names,nrows = -1,skip = 0, encoding = "unknown", text, skipNul = FALSE)
df <- read.table("C:\\Users\\尚天强\\Desktop\\R数据集\\data.txt",sep = ",",header=TRUE)df
read.csv(file, header = TRUE, sep = ",", quote = "\"",dec = ".", fill = TRUE, comment.char = "")
df1 <- read.csv("C:\\Users\\尚天强\\Desktop\\R数据集\\data.csv")df1
#安装包install.packages("readxl")#进行加载包library(readxl)
df2=read_excel("C:\\Users\\尚天强\\Desktop\\R数据集\\data.xlsx")df2
生成数据表使用data.frame命令,与之前pandas生成数据表类似,生成的数据表如下。
#手动创建数据表data<-data.frame(ID=c("c001","c002","c003","c004","c005","c006","c007","c008","c009","c0010"),NAME=c("Rmesh","Khilan","Kaushik","Chaitali","Hardik","Komal","Tom","Muffy","Susan","Kevin"),AGE=c(23,20,23,25,27,24,26,31,26,30),ADDRESS=c("Ahmed","Delhi","Kota","Mumbai","Bhopal","MP-A","MP-B","Indore","JP-No.1","JP-No.2"),SAL=c(2000,1500,2000,NA,8500,NA,5500,9500,NA,900))
#查看数据维度dim(data)
#查看数据表fix(data)
typeof(data$ID)typeof(data$NAME)typeof(data$AGE)typeof(data$ADDRESS)typeof(data$SAL)
is.na(data)
查看具体某一列是否有空值,可以单独索引那一列,然后使用Is.na函数。
#查看单列的空值is.na(data$SAL)
#判断并计算某一列有几个空值length(data$SAL[is.na(data$SAL)==TRUE])

#查看数据表的唯一值(删除重复值)unique(data)
#查看数据表中SAL列的唯一值unique(data$SAL)
#查看数据范围range(data$AGE)#查找最小值min(data$AGE)#查看最小值最在的行位置which.min(data$AGE)#查找最大值max(data$AGE)#查看最大值所在的行位置which.max(data$AGE)
names(data)
#查看前5行head(data,n = 5)
tail函数与head()函数相反,用来查看数据表中后N行的数据,默认显示后10行数据,可以自己设置参数n的值来确定查看的行数。
#查看后3行tail(data,n=3)
1.导出为csv文件
write.csv(x = data,file = "C:\\Users\\尚天强\\Desktop\\out_table.csv",row.names = FALSE)

write.table(x=data,file = "C:\\Users\\尚天强\\Desktop\\out_txt.txt",sep = ",",row.names = FALSE)

