Data

Author

Rami Krispin

Published

July 3, 2025

In this notebook, we will pull the required time series data from the EIA API and reformat it.

The EIA API is a great resource for time series data. It provides access to various types of time series data from the energy industry. We will use the EIAapi library to pull the following time series:



The EIA website



Load the Libraries

We will use the EIAapi library to pull the required time series from the API and use the plotly library to plot the data, and the tsibble library to reformat the data into time series objects.

library(EIAapi)
library(plotly)
library(tsibble)

API Settings

api_key <- Sys.getenv("EIA_API_KEY")

Number of Natural Gas Consumers in California

Series details:

  • Series description: Total number of natural gas consumers in California
  • Area: California
  • Process: Number of Residential Consumers
  • API dashboard link
  • API path: ‘natural-gas/cons/num/data/’
  • Query schema:
{
    "frequency": "annual",
    "data": [
        "value"
    ],
    "facets": {
        "duoarea": [
            "SCA"
        ],
        "process": [
            "VN3"
        ]
    },
    "start": null,
    "end": null,
    "sort": [
        {
            "column": "period",
            "direction": "desc"
        }
    ],
    "offset": 0,
    "length": 5000
}
ts_1 <- eia_get(
        api_key = api_key,
        api_path = "natural-gas/cons/num/data/",
        # frequency = "monthly",
        data = "value",
        facets = list(duoarea = "SCA", process = "VN3") 
    ) |>
    dplyr::arrange(period)

head(ts_1)
  period duoarea  area-name product product-name process
1   1986     SCA CALIFORNIA    EPG0  Natural Gas     VN3
2   1987     SCA CALIFORNIA    EPG0  Natural Gas     VN3
3   1988     SCA CALIFORNIA    EPG0  Natural Gas     VN3
4   1989     SCA CALIFORNIA    EPG0  Natural Gas     VN3
5   1990     SCA CALIFORNIA    EPG0  Natural Gas     VN3
6   1991     SCA CALIFORNIA    EPG0  Natural Gas     VN3
                     process-name       series
1 Number of Residential Consumers NA1501_SCA_8
2 Number of Residential Consumers NA1501_SCA_8
3 Number of Residential Consumers NA1501_SCA_8
4 Number of Residential Consumers NA1501_SCA_8
5 Number of Residential Consumers NA1501_SCA_8
6 Number of Residential Consumers NA1501_SCA_8
                                              series-description   value units
1 California Natural Gas Number of Residential Consumers (Count)    7626 COUNT
2 California Natural Gas Number of Residential Consumers (Count) 7904858 COUNT
3 California Natural Gas Number of Residential Consumers (Count) 8113034 COUNT
4 California Natural Gas Number of Residential Consumers (Count) 8313776 COUNT
5 California Natural Gas Number of Residential Consumers (Count) 8497848 COUNT
6 California Natural Gas Number of Residential Consumers (Count) 8634774 COUNT
ts1 <- ts_1 |>
dplyr::select(index = period, y = value, series_id = duoarea) |>
as_tsibble(index = index)

head(ts1)
# A tsibble: 6 x 3 [1Y]
  index       y series_id
  <int>   <int> <chr>    
1  1986    7626 SCA      
2  1987 7904858 SCA      
3  1988 8113034 SCA      
4  1989 8313776 SCA      
5  1990 8497848 SCA      
6  1991 8634774 SCA      
plot_ly(data = ts1, x = ~ index, y = ~ y, type = "scatter", mode = "lines") |>
plotly::layout(title = "Number of Natural Gas Consumers in California", 
yaxis = list(title = "Number of Consumers"),
xaxis = list(title = "Period")) 

US Monthly Demand for Natural Gas

Series details:

  • Series description: US total monthly demand for natural gas
  • Area: US
  • Process: Delivered to Consumers
  • API dashboard link
  • API path: ‘natural-gas/cons/sum/data/’
  • Query schema:
{
    "frequency": "monthly",
    "data": [
        "value"
    ],
    "facets": {
        "duoarea": [
            "NUS"
        ],
        "process": [
            "VGT"
        ]
    },
    "start": null,
    "end": null,
    "sort": [
        {
            "column": "period",
            "direction": "desc"
        }
    ],
    "offset": 0,
    "length": 5000
}

Let’s pull the data:

ts_2 <- eia_get(
        api_key = api_key,
        api_path = "natural-gas/cons/sum/data/",
        frequency = "monthly",
        data = "value",
        facets = list(duoarea = "NUS", process = "VGT")
    ) |>
    dplyr::arrange(period)

And reformat it:

ts2 <- ts_2 |>
dplyr::mutate(date = lubridate::ym(period),
index = yearmonth(date)) |>
dplyr::select(index, date, y = value, series_id = duoarea) |> 
dplyr::arrange(date) |>
as_tsibble(index = index)

And plot it:

plot_ly(data = ts2, x = ~ date, y = ~ y, type = "scatter", mode = "lines") |>
plotly::layout(title = "US Monthly Demand for Natural Gas", 
yaxis = list(title = "MMCF"),
xaxis = list(title = "Period")) 

Hourly Demand for Electricity in California by Balancing Authority Subregion PGAE

Series details:

  • Series description: Hourly demand by balancing authority subregion. Source: Form EIA-930 Product: Hourly Electric Grid Monitor
  • Balancing authority: CISO
  • Subregion: PGAE
  • API dashboard link
  • API path: electricity/rto/region-sub-ba-data/data/
  • Query schema:
{
    "frequency": "hourly",
    "data": [
        "value"
    ],
    "facets": {
        "subba": [
            "PGAE"
        ],
        "parent": [
            "CISO"
        ]
    },
    "start": null,
    "end": null,
    "sort": [
        {
            "column": "period",
            "direction": "desc"
        }
    ],
    "offset": 0,
    "length": 5000
}

Pulling the series:

ts_3 <- eia_backfill(
        api_key = api_key,
        api_path = "electricity/rto/region-sub-ba-data/data/",
        frequency = "hourly",
        data = "value",
        facets = list(subba = "PGAE", parent = "CISO"),
        start = as.POSIXct("2019-01-01"),
        end = as.POSIXct("2025-07-01"),
        offset = 1500
    )  |> dplyr::arrange(time) 

head(ts_3)
                 time subba               subba_name parent
1 2019-01-01 00:00:00  PGAE Pacific Gas and Electric   CISO
2 2019-01-01 01:00:00  PGAE Pacific Gas and Electric   CISO
3 2019-01-01 02:00:00  PGAE Pacific Gas and Electric   CISO
4 2019-01-01 03:00:00  PGAE Pacific Gas and Electric   CISO
5 2019-01-01 04:00:00  PGAE Pacific Gas and Electric   CISO
6 2019-01-01 05:00:00  PGAE Pacific Gas and Electric   CISO
                             parent_name value   value_units
1 California Independent System Operator 10440 megawatthours
2 California Independent System Operator 11256 megawatthours
3 California Independent System Operator 12501 megawatthours
4 California Independent System Operator 12513 megawatthours
5 California Independent System Operator 12094 megawatthours
6 California Independent System Operator 11604 megawatthours

Converting the series to a tsibble object:

ts3 <- ts_3 |>
dplyr::distinct() |>
dplyr::select(index = time, y = value, series_id = subba) |>
as_tsibble(index = index)

Plotting the series:

plot_ly(data = ts3, x = ~ index, y = ~ y, type = "scatter", mode = "lines") |>
plotly::layout(title = "Hourly Demand for Electricity in California Subregion PGAE", 
yaxis = list(title = "MegaWatthHours"),
xaxis = list(title = "Period"))  

Saving the Data

save(ts1, ts2, ts3, file = "./data/ts.RData")