In [ ]:
#considering the incoming & outgoing trips features of every node
outgoing_trips = trips.groupby("start_station_id").count()["id"].values
incoming_trips = trips.groupby("end_station_id").count()["id"].values
all_station_ids = set(trips["start_station_id"].unique()) | set(trips["end_station_id"].unique())
outgoing_trips = np.pad(outgoing_trips, (0, len(all_station_ids) - len(outgoing_trips)), mode='constant')
incoming_trips = np.pad(incoming_trips, (0, len(all_station_ids) - len(incoming_trips)), mode='constant')
outgoing_trips = (outgoing_trips - np.min(outgoing_trips)) / (np.max(outgoing_trips) - np.min(outgoing_trips))
incoming_trips = (incoming_trips - np.min(incoming_trips)) / (np.max(incoming_trips) - np.min(incoming_trips))
node_features = np.stack([outgoing_trips, incoming_trips]).transpose()
print("Full shape: ", node_features.shape)
node_features[:10]
Full shape: (603, 2)
array([[0.26 , 0.26203807], [0.09647059, 0.09742441], [0.47647059, 0.44008959], [0.20941176, 0.22732363], [0.18823529, 0.16797312], [0.46588235, 0.40761478], [0.23647059, 0.17581187], [0.11058824, 0.0862262 ], [0.34705882, 0.32698768], [0.06470588, 0.06942889]])