R Data Import/Export — Working with Data Formats
Learning Objectives
By the end of this tutorial, you will be able to:
- Read and write CSV, TSV, and fixed-width files
- Import and export Excel spreadsheets
- Work with JSON and XML data
- Connect to SQL databases
- Fetch data from web APIs
Delimited Files
CSV with readr
library(readr)
# Read CSV
df <- read_csv("data.csv")
# Specify column types
df <- read_csv("data.csv", col_types = cols(
id = col_integer(),
name = col_character(),
date = col_date(),
value = col_double()
))
# Handle problematic files
df <- read_csv("data.csv",
skip = 1, # Skip first row
col_names = FALSE, # No header
na = c("", "NA", "N/A"), # NA values
locale = locale(encoding = "latin1") # Encoding
)
# Write CSV
write_csv(df, "output.csv")
write_csv(df, "output.csv", na = "")
Base R
# Read
df <- read.csv("data.csv", stringsAsFactors = FALSE)
df <- read.delim("data.tsv")
# Write
write.csv(df, "output.csv", row.names = FALSE)
Excel Files
library(readxl)
library(writexl)
# List sheets
excel_sheets("data.xlsx")
# Read specific sheet
df <- read_excel("data.xlsx", sheet = "Sheet1")
df <- read_excel("data.xlsx", sheet = 2)
# Read range
df <- read_excel("data.xlsx", range = "A1:D100")
df <- read_excel("data.xlsx", range = cell_cols("A:D"))
# Skip rows, rename columns
df <- read_excel("data.xlsx",
skip = 2,
col_names = c("id", "name", "value")
)
# Write Excel
write_xlsx(df, "output.xlsx")
# Multiple sheets
write_xlsx(
list(Sheet1 = df1, Sheet2 = df2),
"output.xlsx"
)
JSON Data
library(jsonlite)
# Read JSON
data <- fromJSON("data.json")
# Nested JSON
data <- fromJSON("data.json", flatten = TRUE)
# Read JSON URL
data <- fromJSON("https://api.example.com/data")
# Write JSON
json_str <- toJSON(data, pretty = TRUE, auto_unbox = TRUE)
writeLines(json_str, "output.json")
# Parse JSON string
json_str <- '{"name": "Alice", "scores": [95, 87, 92]}'
data <- fromJSON(json_str)
XML Data
library(xml2)
# Read XML
doc <- read_xml("data.xml")
# XPath
nodes <- xml_find_all(doc, "//person/name")
names <- xml_text(nodes)
# Read HTML
html <- read_html("page.html")
titles <- html_elements(html, "h1") |> html_text()
# Write XML
xml_new_root("data") |>
xml_add_child("item", name = "Alice", age = "30") |>
write_xml("output.xml")
Database Connections
SQLite
library(DBI)
# Connect
con <- dbConnect(RSQLite::SQLite(), "database.sqlite")
# List tables
dbListTables(con)
# Read table
df <- dbReadTable(con, "users")
# Write table
dbWriteTable(con, "new_table", df, overwrite = TRUE)
# Query
result <- dbGetQuery(con, "SELECT * FROM users WHERE age > 25")
# Parameterized query
result <- dbSendQuery(con, "SELECT * FROM users WHERE age > ?", 25)
df <- dbFetch(result)
dbClearResult(result)
# Disconnect
dbDisconnect(con)
MySQL/PostgreSQL
library(DBI)
library(RMySQL) # or RPostgres
# Connect
con <- dbConnect(RMySQL::MySQL(),
dbname = "mydb",
host = "localhost",
port = 3306,
user = "root",
password = "password"
)
# Query
df <- dbGetQuery(con, "SELECT * FROM users")
# dplyr interface
library(dplyr)
con <- dbConnect(RSQLite::SQLite(), "database.sqlite")
tbl(con, "users") |>
filter(age > 25) |>
collect()
dbDisconnect(con)
Web APIs
library(httr)
library(jsonlite)
# GET request
response <- GET("https://api.example.com/data")
data <- content(response, "parsed")
# With parameters
response <- GET(
"https://api.example.com/data",
query = list(key = "value", page = 1)
)
# POST request
response <- POST(
"https://api.example.com/data",
body = list(name = "Alice", age = 30),
encode = "json"
)
# Authentication
response <- GET(
"https://api.example.com/data",
add_headers(Authorization = "Bearer token123")
)
# Handle errors
if (status_code(response) == 200) {
data <- content(response, "parsed")
} else {
cat("Error:", status_code(response), "\n")
}
Large Files
# chunked reading
library(readr)
df <- read_csv_chunked("large.csv",
callback = DataFrameCallback$new(function(x, pos) {
x[x$value > 100, ]
}),
chunk_size = 10000
)
# fst format (fast read/write)
library(fst)
write_fst(df, "data.fst")
df <- read_fst("data.fst")
# data.table (fast reading)
library(data.table)
df <- fread("large.csv")
Practice Exercises
Exercise 1: Multi-Format Reader
Write a function that reads data from CSV, Excel, or JSON based on file extension.
Solution
read_data <- function(file_path) {
ext <- tools::file_ext(file_path)
switch(ext,
csv = read_csv(file_path, show_col_types = FALSE),
xlsx = , xls = read_excel(file_path),
json = fromJSON(file_path),
stop(paste("Unsupported format:", ext))
)
}
# Test
# read_data("data.csv")
# read_data("data.xlsx")
# read_data("data.json")
Key Takeaways
readr::read_csv()is fast and returns tibblesreadxl::read_excel()handles Excel files cleanlyjsonlite::fromJSON()parses JSON dataDBIpackage provides database connectivityhttrpackage handles web API requests- Always specify column types for reliable imports
- Use
write_csv()withna = ""for clean exports
Next: Learn about R Data Manipulation with dplyr — the grammar of data manipulation.