Storing & Loading Data

# The following commands are useful for writing a dataframe to a file:

dfA = data.frame(a = c(10, 20, 30), b = c("RA", "RB", "RC"))
dfB = data.frame(a = c(10, 20, 30), b = c("RA", "RB", "RC"))


# Write .csv files:

write.table(dfA, "myfile.csv", sep = ",")

# to preserve special encoding (e.g. cyrillic characters), use this:
library(readr)
write_excel_csv(x = dfA, file = "myfile.csv", col_names = T)



# Write .xslx files:

library(openxlsx)
write.xlsx(dfA, "myfile.xlsx")

# write several sheets: (same package)

wb = createWorkbook("whatever")  # need to open a named workbook; name is irrelevant here
addWorksheet(wb, "alpha")
addWorksheet(wb, "beta")

writeData(wb, sheet = "alpha", dfA)
writeData(wb, sheet = "beta", dfB)

saveWorkbook(wb, "myfile.xlsx", overwrite = TRUE)
# The following commands are useful for reading-in files (datasets) into R:


# Read in .csv file:

# simply:
dfData = read.table("myfile.csv", sep = ",")
# with more options:
dfData = read.csv("myfile.csv", header = TRUE, sep = ",", quote = "\"", dec = ",",
    fill = TRUE, comment.char = "", skip = 1, nrows = 4)


# Read in .xslx file:

library(readxl)
dfData = read_excel("myfile.xlsx", sheet = "alpha")
# Read in .dta file:

library(haven)
myData = read_dta("myfile.dta")
# (this command cannot be ran, as we don't have 'myfile.dta' in our path)


# There are many packages to download data directly from some
# databases/websites.  e.g. 'quantmod' to download data from yahoo or FRED
# database, or 'imfr' package for downloading data from IMF (more on them
# later)


# Download file from some url:
library(RCurl)
download.file("https://api.census.gov/data/1994/cps/basic/jan?tabulate=weight(PWCMPWGT)&col+PEEDUCA&row+PEMLR",
    "./Data/1994jan", "libcurl")
# In my experience, due to type conversion issues, it's much easier to work
# with csv files than with excel (xlsx) files in R.  So you might want to
# convert any xlsx files you need first manually into csv files.  (You can also
# do basic adjustments (like deleting unnecessary rows) manually in the
# xlsx/csv file, though this is discouraged, as you would like others to be
# able to replicate your whole data-manipulation process and therefore verify
# your work.)

# If you have to load an xlsx file and some of its columns are not recognized
# as a numeric by R (but as a factor), this can be useful:

indx = sapply(dfData, is.factor)
dfData[indx] = lapply(dfData[indx], function(x) as.numeric(as.character(x)))

# You might also need to convert columns of type 'character' to type 'numeric':
dfData[, -1] = lapply(dfData[, -1], function(x) as.numeric(x))
# (here it's done for all but first column, say because it contains a list of
# countrynames, which are supposed to be strings)