R File Handling — Reading and Writing Files

R BasicsFile HandlingFree Lesson

Advertisement

R File Handling — Reading and Writing Files

Learning Objectives

By the end of this tutorial, you will be able to:

  • Read and write CSV, TSV, and delimited files
  • Import and export Excel files
  • Work with JSON data
  • Connect to databases
  • Handle file paths and directories

Working Directory

# Check current directory
getwd()
# [1] "C:/Users/YourName/Documents"

# Set working directory
setwd("C:/my_project/data")

# List files
list.files()
list.files(path = "data", pattern = "*.csv")

CSV Files

Base R

# Read CSV
df <- read.csv("data.csv")
df <- read.csv("data.csv", stringsAsFactors = FALSE)
df <- read.csv("data.csv", na.strings = c("", "NA", "N/A"))

# Write CSV
write.csv(df, "output.csv", row.names = FALSE)

# TSV (tab-separated)
df <- read.delim("data.tsv")
write.delim(df, "output.tsv", row.names = FALSE)

readr Package (Tidyverse)

library(readr)

# Read CSV (faster, returns tibble)
df <- read_csv("data.csv")
df <- read_csv("data.csv", col_types = cols(
  name = col_character(),
  age = col_integer(),
  score = col_double()
))

# Write CSV
write_csv(df, "output.csv")
write_csv(df, "output.csv", na = "")

# TSV
df <- read_tsv("data.tsv")
write_tsv(df, "output.tsv")

# Read with specific locale
df <- read_csv("data.csv", locale = locale(encoding = "latin1"))

Excel Files

readxl Package

library(readxl)

# Read Excel
df <- read_excel("data.xlsx")
df <- read_excel("data.xlsx", sheet = "Sheet1")
df <- read_excel("data.xlsx", sheet = 2)
df <- read_excel("data.xlsx", range = "A1:D10")
df <- read_excel("data.xlsx", col_names = FALSE)

# List sheets
excel_sheets("data.xlsx")

# Write Excel
library(writexl)
write_xlsx(df, "output.xlsx")
write_xlsx(list(Sheet1 = df1, Sheet2 = df2), "output.xlsx")

JSON Files

jsonlite Package

library(jsonlite)

# Read JSON
data <- fromJSON("data.json")
data <- fromJSON("data.json", flatten = TRUE)

# Write JSON
toJSON(data, pretty = TRUE, auto_unbox = TRUE) |>
  write("output.json")

# Parse JSON string
json_str <- '{"name": "Alice", "age": 30}'
fromJSON(json_str)

Database Files

DBI Package

library(DBI)

# SQLite
con <- dbConnect(RSQLite::SQLite(), "database.sqlite")
df <- dbReadTable(con, "table_name")
dbWriteTable(con, "new_table", df)
dbGetQuery(con, "SELECT * FROM table_name WHERE age > 25")
dbDisconnect(con)

# RMySQL
con <- dbConnect(RMySQL::MySQL(),
  dbname = "mydb",
  host = "localhost",
  user = "root",
  password = "password"
)
df <- dbGetQuery(con, "SELECT * FROM users")
dbDisconnect(con)

# dplyr interface
library(dplyr)
con <- dbConnect(RSQLite::SQLite(), "database.sqlite")
tbl(con, "users") |>
  filter(age > 25) |>
  collect()
dbDisconnect(con)

Other Formats

RDS Files

# Save R object
saveRDS(df, "data.rds")

# Load R object
df <- readRDS("data.rds")

# Save multiple objects
save(df1, df2, file = "data.RData")
load("data.RData")

SAS, SPSS, Stata

library(haven)

# SAS
df <- read_sas("data.sas7bdat")
write_sas(df, "output.sas7bdat")

# SPSS
df <- read_spss("data.sav")
write_sav(df, "output.sav")

# Stata
df <- read_dta("data.dta")
write_dta(df, "output.dta")

File Paths

# File path construction
file.path("data", "raw", "data.csv")
# [1] "data/raw/data.csv"

# List files with pattern
list.files("data", pattern = "\\.csv$")

# File information
file.exists("data.csv")
file.size("data.csv")
file.info("data.csv")

# Create directory
dir.create("output", recursive = TRUE)

# Copy, move, delete
file.copy("data.csv", "backup/data.csv")
file.rename("old.csv", "new.csv")
file.remove("temp.csv")

Practice Exercises

Exercise 1: CSV Reader

Write a function that reads a CSV file and returns a summary of each column.

Solution

library(readr)

summarize_csv <- function(file_path) {
  df <- read_csv(file_path, show_col_types = FALSE)
  lapply(df, summary)
}

# Test
# summarize_csv("mtcars.csv")

Key Takeaways

  • read_csv() from readr is faster and returns tibbles
  • read_excel() from readxl handles Excel files
  • fromJSON() from jsonlite parses JSON
  • DBI package connects to databases
  • saveRDS() and readRDS() save/load R objects
  • Always use row.names = FALSE when writing CSV
  • Use file.path() for cross-platform file paths

Next: Learn about R Error Handling — tryCatch and debugging.

Advertisement

Need Expert R Programming Help?

Get personalized tutoring, project support, or professional consulting.

Advertisement