The spflow package provides three additional classes to the R environment. These allow to handle origin-destination flow data efficiently, by exploiting the relational structure of origin-destination data.
Data on origins and destinations are stored in the
spflow_network-class()
and data on the origin-destination pairs are
stored in an spflow_network_pair-class()
.
A third object of type spflow_network_multi-class()
gathers all information
from the different data sources.
The class can be thought of as a simple relational database which ensures
correct identification of origin-destination pairs with the node level information.
### An example use case for the spflow network classes and model estimation
# load example data
data("paris10km_municipalities")
data("paris10km_neighborhood")
data("paris10km_commuteflows")
# define the spflow_network...
# ... they are used as origins and destinations
# ... their neighborhood is based on contiguity
paris10km_net <- spflow_network(
id_net = "paris10km",
node_neighborhood = paris10km_neighborhood$by_contiguity,
node_data = sf::st_drop_geometry(paris10km_municipalities),
node_key_column = "ID_MUN")
# define the spflow_network_pair...
# ... contains pairwise data (flows and distances)
# ... must be linked to an origin and a destination network
paris10km_net_pairs <- spflow_network_pair(
id_orig_net = "paris10km",
id_dest_net = "paris10km",
pair_data = paris10km_commuteflows,
orig_key_column = "ID_ORIG",
dest_key_column = "ID_DEST")
# define the spflow_network_pair...
# ... combines information on nodes and pairs
paris10km_multinet <- spflow_network_multi(paris10km_net,paris10km_net_pairs)
clog <- function(x) {
y <- log(x)
y - mean(y)
}
# define the model that we use to explain the flows...
# ... D_() contains destination variables
# ... O_() contains origin variables
# ... D_() contains intra-regional variables (when origin == destination)
# ... P_() contains pair variables (distances)
flow_formula <-
log(COMMUTE_FLOW + 1) ~
D_(log(NB_COMPANY) + clog(MED_INCOME)) +
O_(log(POPULATION) + log(NB_COMPANY) + clog(MED_INCOME)) +
I_(log(NB_COMPANY) + log(POPULATION)) +
P_(log(DISTANCE + 1))
# define what variables to use in an SDM specification
# ... if not given all will be used
sdm_formula <-
~ D_(log(NB_COMPANY) + clog(MED_INCOME))
# define the list of control parameters
estimation_control <- spflow_control(sdm_variables = sdm_formula)
# Estimate the model
spflow(flow_formula, paris10km_multinet, estimation_control = estimation_control)
#> Error in clog(MED_INCOME): could not find function "clog"