Storing & Loading Data
# The following commands are useful for writing a dataframe to a file:
dfA = data.frame(a = c(10, 20, 30), b = c("RA", "RB", "RC"))
dfB = data.frame(a = c(10, 20, 30), b = c("RA", "RB", "RC"))
# Write .csv files:
write.table(dfA, "myfile.csv", sep = ",")
# to preserve special encoding (e.g. cyrillic characters), use this:
library(readr)
write_excel_csv(x = dfA, file = "myfile.csv", col_names = T)
# Write .xslx files:
library(openxlsx)
write.xlsx(dfA, "myfile.xlsx")
# write several sheets: (same package)
wb = createWorkbook("whatever") # need to open a named workbook; name is irrelevant here
addWorksheet(wb, "alpha")
addWorksheet(wb, "beta")
writeData(wb, sheet = "alpha", dfA)
writeData(wb, sheet = "beta", dfB)
saveWorkbook(wb, "myfile.xlsx", overwrite = TRUE)
# The following commands are useful for reading-in files (datasets) into R:
# Read in .csv file:
# simply:
dfData = read.table("myfile.csv", sep = ",")
# with more options:
dfData = read.csv("myfile.csv", header = TRUE, sep = ",", quote = "\"", dec = ",",
fill = TRUE, comment.char = "", skip = 1, nrows = 4)
# Read in .xslx file:
library(readxl)
dfData = read_excel("myfile.xlsx", sheet = "alpha")
# Read in .dta file:
library(haven)
myData = read_dta("myfile.dta")
# (this command cannot be ran, as we don't have 'myfile.dta' in our path)
# There are many packages to download data directly from some
# databases/websites. e.g. 'quantmod' to download data from yahoo or FRED
# database, or 'imfr' package for downloading data from IMF (more on them
# later)
# Download file from some url:
library(RCurl)
download.file("https://api.census.gov/data/1994/cps/basic/jan?tabulate=weight(PWCMPWGT)&col+PEEDUCA&row+PEMLR",
"./Data/1994jan", "libcurl")
# In my experience, due to type conversion issues, it's much easier to work
# with csv files than with excel (xlsx) files in R. So you might want to
# convert any xlsx files you need first manually into csv files. (You can also
# do basic adjustments (like deleting unnecessary rows) manually in the
# xlsx/csv file, though this is discouraged, as you would like others to be
# able to replicate your whole data-manipulation process and therefore verify
# your work.)
# If you have to load an xlsx file and some of its columns are not recognized
# as a numeric by R (but as a factor), this can be useful:
indx = sapply(dfData, is.factor)
dfData[indx] = lapply(dfData[indx], function(x) as.numeric(as.character(x)))
# You might also need to convert columns of type 'character' to type 'numeric':
dfData[, -1] = lapply(dfData[, -1], function(x) as.numeric(x))
# (here it's done for all but first column, say because it contains a list of
# countrynames, which are supposed to be strings)