R Data Import/Export — Working with Data Formats

Learning Objectives

By the end of this tutorial, you will be able to:

Read and write CSV, TSV, and fixed-width files
Import and export Excel spreadsheets
Work with JSON and XML data
Connect to SQL databases
Fetch data from web APIs

Delimited Files

CSV with readr

library(readr)

# Read CSV
df <- read_csv("data.csv")

# Specify column types
df <- read_csv("data.csv", col_types = cols(
  id = col_integer(),
  name = col_character(),
  date = col_date(),
  value = col_double()
))

# Handle problematic files
df <- read_csv("data.csv",
  skip = 1,                    # Skip first row
  col_names = FALSE,           # No header
  na = c("", "NA", "N/A"),     # NA values
  locale = locale(encoding = "latin1")  # Encoding
)

# Write CSV
write_csv(df, "output.csv")
write_csv(df, "output.csv", na = "")

Base R

# Read
df <- read.csv("data.csv", stringsAsFactors = FALSE)
df <- read.delim("data.tsv")

# Write
write.csv(df, "output.csv", row.names = FALSE)

Excel Files

library(readxl)
library(writexl)

# List sheets
excel_sheets("data.xlsx")

# Read specific sheet
df <- read_excel("data.xlsx", sheet = "Sheet1")
df <- read_excel("data.xlsx", sheet = 2)

# Read range
df <- read_excel("data.xlsx", range = "A1:D100")
df <- read_excel("data.xlsx", range = cell_cols("A:D"))

# Skip rows, rename columns
df <- read_excel("data.xlsx",
  skip = 2,
  col_names = c("id", "name", "value")
)

# Write Excel
write_xlsx(df, "output.xlsx")

# Multiple sheets
write_xlsx(
  list(Sheet1 = df1, Sheet2 = df2),
  "output.xlsx"
)

JSON Data

library(jsonlite)

# Read JSON
data <- fromJSON("data.json")

# Nested JSON
data <- fromJSON("data.json", flatten = TRUE)

# Read JSON URL
data <- fromJSON("https://api.example.com/data")

# Write JSON
json_str <- toJSON(data, pretty = TRUE, auto_unbox = TRUE)
writeLines(json_str, "output.json")

# Parse JSON string
json_str <- '{"name": "Alice", "scores": [95, 87, 92]}'
data <- fromJSON(json_str)

XML Data

library(xml2)

# Read XML
doc <- read_xml("data.xml")

# XPath
nodes <- xml_find_all(doc, "//person/name")
names <- xml_text(nodes)

# Read HTML
html <- read_html("page.html")
titles <- html_elements(html, "h1") |> html_text()

# Write XML
xml_new_root("data") |>
  xml_add_child("item", name = "Alice", age = "30") |>
  write_xml("output.xml")

Database Connections

SQLite

library(DBI)

# Connect
con <- dbConnect(RSQLite::SQLite(), "database.sqlite")

# List tables
dbListTables(con)

# Read table
df <- dbReadTable(con, "users")

# Write table
dbWriteTable(con, "new_table", df, overwrite = TRUE)

# Query
result <- dbGetQuery(con, "SELECT * FROM users WHERE age > 25")

# Parameterized query
result <- dbSendQuery(con, "SELECT * FROM users WHERE age > ?", 25)
df <- dbFetch(result)
dbClearResult(result)

# Disconnect
dbDisconnect(con)

MySQL/PostgreSQL

library(DBI)
library(RMySQL)  # or RPostgres

# Connect
con <- dbConnect(RMySQL::MySQL(),
  dbname = "mydb",
  host = "localhost",
  port = 3306,
  user = "root",
  password = "password"
)

# Query
df <- dbGetQuery(con, "SELECT * FROM users")

# dplyr interface
library(dplyr)
con <- dbConnect(RSQLite::SQLite(), "database.sqlite")
tbl(con, "users") |>
  filter(age > 25) |>
  collect()
dbDisconnect(con)

Web APIs

library(httr)
library(jsonlite)

# GET request
response <- GET("https://api.example.com/data")
data <- content(response, "parsed")

# With parameters
response <- GET(
  "https://api.example.com/data",
  query = list(key = "value", page = 1)
)

# POST request
response <- POST(
  "https://api.example.com/data",
  body = list(name = "Alice", age = 30),
  encode = "json"
)

# Authentication
response <- GET(
  "https://api.example.com/data",
  add_headers(Authorization = "Bearer token123")
)

# Handle errors
if (status_code(response) == 200) {
  data <- content(response, "parsed")
} else {
  cat("Error:", status_code(response), "\n")
}

Large Files

# chunked reading
library(readr)
df <- read_csv_chunked("large.csv",
  callback = DataFrameCallback$new(function(x, pos) {
    x[x$value > 100, ]
  }),
  chunk_size = 10000
)

# fst format (fast read/write)
library(fst)
write_fst(df, "data.fst")
df <- read_fst("data.fst")

# data.table (fast reading)
library(data.table)
df <- fread("large.csv")

Practice Exercises

Exercise 1: Multi-Format Reader

Write a function that reads data from CSV, Excel, or JSON based on file extension.

Solution

read_data <- function(file_path) {
  ext <- tools::file_ext(file_path)

  switch(ext,
    csv = read_csv(file_path, show_col_types = FALSE),
    xlsx = , xls = read_excel(file_path),
    json = fromJSON(file_path),
    stop(paste("Unsupported format:", ext))
  )
}

# Test
# read_data("data.csv")
# read_data("data.xlsx")
# read_data("data.json")

Key Takeaways

readr::read_csv() is fast and returns tibbles
readxl::read_excel() handles Excel files cleanly
jsonlite::fromJSON() parses JSON data
DBI package provides database connectivity
httr package handles web API requests
Always specify column types for reliable imports
Use write_csv() with na = "" for clean exports

Next: Learn about R Data Manipulation with dplyr — the grammar of data manipulation.

R Data Import/Export — Working with Data Formats

R Data Import/Export — Working with Data Formats

Learning Objectives

Delimited Files

CSV with readr

Base R

Excel Files

JSON Data

XML Data

Database Connections

SQLite

MySQL/PostgreSQL

Web APIs

Large Files

Practice Exercises

Exercise 1: Multi-Format Reader

Key Takeaways

Need Expert R Programming Help?