In [ ]:
start_date = datetime.strptime("2020-04-01 00:00:30", "%Y-%m-%d %H:%M:%S")
end_date = datetime.strptime("2020-05-01 00:00:00", "%Y-%m-%d %H:%M:%S")
interval = timedelta(minutes=60)
xs = []
edge_indices = []
ys = []
y_indices = []
edge_features = []
while start_date <= end_date:
# 0 - 60 min
current_snapshot = trips[((start_date + interval) >= trips["end_date"])
& (start_date <= trips["end_date"])]
# 60 - 120 min
subsequent_snapshot = trips[((start_date + 2*interval) >= trips["end_date"])
& (start_date + interval <= trips["end_date"])]
current_snapshot = current_snapshot.groupby(["start_station_id", "end_station_id"]).mean().reset_index()
subsequent_snapshot = subsequent_snapshot.groupby(["start_station_id", "end_station_id"]).mean().reset_index()
edge_feats, additional_edge_index = extract_dynamic_edges(current_snapshot)
exteneded_edge_index = np.concatenate([edge_index, additional_edge_index], axis=1)
extended_edge_feats = np.concatenate([edge_feats, static_edge_features], axis=0)
y = subsequent_snapshot["duration"].values
y_index = subsequent_snapshot[["start_station_id", "end_station_id"]].values
xs.append(node_features)
edge_indices.append(exteneded_edge_index)
edge_features.append(extended_edge_feats)
ys.append(y)
y_indices.append(y_index.transpose())
start_date += interval