R File Handling — Reading and Writing Files
Learning Objectives
By the end of this tutorial, you will be able to:
- Read and write CSV, TSV, and delimited files
- Import and export Excel files
- Work with JSON data
- Connect to databases
- Handle file paths and directories
Working Directory
# Check current directory
getwd()
# [1] "C:/Users/YourName/Documents"
# Set working directory
setwd("C:/my_project/data")
# List files
list.files()
list.files(path = "data", pattern = "*.csv")
CSV Files
Base R
# Read CSV
df <- read.csv("data.csv")
df <- read.csv("data.csv", stringsAsFactors = FALSE)
df <- read.csv("data.csv", na.strings = c("", "NA", "N/A"))
# Write CSV
write.csv(df, "output.csv", row.names = FALSE)
# TSV (tab-separated)
df <- read.delim("data.tsv")
write.delim(df, "output.tsv", row.names = FALSE)
readr Package (Tidyverse)
library(readr)
# Read CSV (faster, returns tibble)
df <- read_csv("data.csv")
df <- read_csv("data.csv", col_types = cols(
name = col_character(),
age = col_integer(),
score = col_double()
))
# Write CSV
write_csv(df, "output.csv")
write_csv(df, "output.csv", na = "")
# TSV
df <- read_tsv("data.tsv")
write_tsv(df, "output.tsv")
# Read with specific locale
df <- read_csv("data.csv", locale = locale(encoding = "latin1"))
Excel Files
readxl Package
library(readxl)
# Read Excel
df <- read_excel("data.xlsx")
df <- read_excel("data.xlsx", sheet = "Sheet1")
df <- read_excel("data.xlsx", sheet = 2)
df <- read_excel("data.xlsx", range = "A1:D10")
df <- read_excel("data.xlsx", col_names = FALSE)
# List sheets
excel_sheets("data.xlsx")
# Write Excel
library(writexl)
write_xlsx(df, "output.xlsx")
write_xlsx(list(Sheet1 = df1, Sheet2 = df2), "output.xlsx")
JSON Files
jsonlite Package
library(jsonlite)
# Read JSON
data <- fromJSON("data.json")
data <- fromJSON("data.json", flatten = TRUE)
# Write JSON
toJSON(data, pretty = TRUE, auto_unbox = TRUE) |>
write("output.json")
# Parse JSON string
json_str <- '{"name": "Alice", "age": 30}'
fromJSON(json_str)
Database Files
DBI Package
library(DBI)
# SQLite
con <- dbConnect(RSQLite::SQLite(), "database.sqlite")
df <- dbReadTable(con, "table_name")
dbWriteTable(con, "new_table", df)
dbGetQuery(con, "SELECT * FROM table_name WHERE age > 25")
dbDisconnect(con)
# RMySQL
con <- dbConnect(RMySQL::MySQL(),
dbname = "mydb",
host = "localhost",
user = "root",
password = "password"
)
df <- dbGetQuery(con, "SELECT * FROM users")
dbDisconnect(con)
# dplyr interface
library(dplyr)
con <- dbConnect(RSQLite::SQLite(), "database.sqlite")
tbl(con, "users") |>
filter(age > 25) |>
collect()
dbDisconnect(con)
Other Formats
RDS Files
# Save R object
saveRDS(df, "data.rds")
# Load R object
df <- readRDS("data.rds")
# Save multiple objects
save(df1, df2, file = "data.RData")
load("data.RData")
SAS, SPSS, Stata
library(haven)
# SAS
df <- read_sas("data.sas7bdat")
write_sas(df, "output.sas7bdat")
# SPSS
df <- read_spss("data.sav")
write_sav(df, "output.sav")
# Stata
df <- read_dta("data.dta")
write_dta(df, "output.dta")
File Paths
# File path construction
file.path("data", "raw", "data.csv")
# [1] "data/raw/data.csv"
# List files with pattern
list.files("data", pattern = "\\.csv$")
# File information
file.exists("data.csv")
file.size("data.csv")
file.info("data.csv")
# Create directory
dir.create("output", recursive = TRUE)
# Copy, move, delete
file.copy("data.csv", "backup/data.csv")
file.rename("old.csv", "new.csv")
file.remove("temp.csv")
Practice Exercises
Exercise 1: CSV Reader
Write a function that reads a CSV file and returns a summary of each column.
Solution
library(readr)
summarize_csv <- function(file_path) {
df <- read_csv(file_path, show_col_types = FALSE)
lapply(df, summary)
}
# Test
# summarize_csv("mtcars.csv")
Key Takeaways
read_csv()from readr is faster and returns tibblesread_excel()from readxl handles Excel filesfromJSON()from jsonlite parses JSONDBIpackage connects to databasessaveRDS()andreadRDS()save/load R objects- Always use
row.names = FALSEwhen writing CSV - Use
file.path()for cross-platform file paths
Next: Learn about R Error Handling — tryCatch and debugging.