diff --git a/crates/server/src/data_structures.rs b/crates/server/src/data_structures.rs index 2c449e2..dfbaab9 100644 --- a/crates/server/src/data_structures.rs +++ b/crates/server/src/data_structures.rs @@ -19,7 +19,7 @@ use bgp_packet::path_attributes::PathAttribute; use chrono::{DateTime, Utc}; -use crate::rib_manager::PathData; +use crate::path::path_data::PathData; /// RouteInfo encapsulates information received about a particular BGP route. #[derive(Clone, Debug)] @@ -53,6 +53,8 @@ pub enum RouteUpdate { #[derive(Debug)] pub struct RouteWithdraw { + /// The peer identifier of the peer that sent the withdrawal. pub peer_id: Ipv4Addr, + /// The prefixes which have been withdrawn. pub prefixes: Vec, } diff --git a/crates/server/src/lib.rs b/crates/server/src/lib.rs index 6f32840..516edc3 100644 --- a/crates/server/src/lib.rs +++ b/crates/server/src/lib.rs @@ -16,6 +16,7 @@ pub mod bgp_server; pub mod config; pub mod data_structures; pub mod filter_eval; +pub mod path; pub mod peer; pub mod rib_manager; pub mod route_server; diff --git a/crates/server/src/path/mod.rs b/crates/server/src/path/mod.rs new file mode 100644 index 0000000..657518e --- /dev/null +++ b/crates/server/src/path/mod.rs @@ -0,0 +1,3 @@ +/// Contains structures for working with BGP paths (data associated with a route to a prefix). +pub mod path_data; +pub mod path_set; diff --git a/crates/server/src/path/path_data.rs b/crates/server/src/path/path_data.rs new file mode 100644 index 0000000..88fcc50 --- /dev/null +++ b/crates/server/src/path/path_data.rs @@ -0,0 +1,95 @@ +use std::cmp::Ordering; + +use chrono::{DateTime, Utc}; +use serde::Serialize; + +use bgp_packet::path_attributes::{OriginPathAttribute, PathAttribute}; + +use super::path_set::PathSource; + +/// PathData is a structure to contain a specific route via one nexthop. +/// Note that currently there is an assumption that there is only +/// one route per peer per prefix, but when ADD-PATH support is added +/// this will no longer hold true. +#[derive(Debug, Clone, Serialize)] +pub struct PathData { + /// The origin through which this path was learned. This is set to EGP when learned from + /// another peer, set to IGP when statically configured or from another control plane. + pub origin: OriginPathAttribute, + /// The nexthop that traffic can be sent to. + pub nexthop: Vec, + /// Where this path was learned from. + pub path_source: PathSource, + /// The local pref of this path. + pub local_pref: u32, + /// The multi exit discriminator of this path. + pub med: u32, + /// The path of autonomous systems to the destination along this path. + pub as_path: Vec, + /// Path attributes received from the peer. + pub path_attributes: Vec, + /// When the path was learned. + pub learn_time: DateTime, +} + +impl PartialOrd for PathData { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for PathData { + fn cmp(&self, other: &Self) -> Ordering { + // Compare local_pref. + match self.local_pref.cmp(&other.local_pref) { + Ordering::Equal => {} + ord => return ord, + } + + // Prefer paths that are locally configured. + if matches!(self.path_source, PathSource::LocallyConfigured) + && !matches!(other.path_source, PathSource::LocallyConfigured) + { + return Ordering::Less; + } + + // Compare path length. + match self.as_path.len().cmp(&other.as_path.len()) { + Ordering::Equal => {} + ord => return ord, + } + + // IGP < EGP < INCOMPLETE + match (self.origin as u8).cmp(&(other.origin as u8)) { + Ordering::Equal => {} + ord => return ord, + } + + // MED lower is better, only checked if the announcing ASN is the same. + if let (Some(announcing_as_self), Some(announcing_as_other)) = + (self.as_path.last(), other.as_path.last()) + { + if announcing_as_self == announcing_as_other && self.med < other.med { + return Ordering::Less; + } + } + + // As a discriminator of last resort, prefer older routes. + self.learn_time.cmp(&other.learn_time) + } +} + +impl PartialEq for PathData { + fn eq(&self, other: &Self) -> bool { + self.origin == other.origin + && self.nexthop == other.nexthop + && self.path_source == other.path_source + && self.local_pref == other.local_pref + && self.med == other.med + && self.as_path == other.as_path + && self.path_attributes == other.path_attributes + && self.learn_time == other.learn_time + } +} + +impl Eq for PathData {} diff --git a/crates/server/src/path/path_set.rs b/crates/server/src/path/path_set.rs new file mode 100644 index 0000000..1f58ba3 --- /dev/null +++ b/crates/server/src/path/path_set.rs @@ -0,0 +1,132 @@ +use std::{ + collections::{btree_set, BTreeMap, BTreeSet}, + net::Ipv4Addr, + sync::Arc, +}; + +use bgp_packet::nlri::NLRI; +use eyre::{bail, Result}; +use serde::Serialize; + +use super::path_data::PathData; + +#[derive(Debug, Clone, Serialize)] +pub enum PathSource { + LocallyConfigured, + /// BGPPeer represents a path that has been learned from a BGP peer, + /// and contains the Router ID of the peer. + BGPPeer(Ipv4Addr), +} + +impl PartialEq for PathSource { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::BGPPeer(l0), Self::BGPPeer(r0)) => l0 == r0, + _ => core::mem::discriminant(self) == core::mem::discriminant(other), + } + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct PathSet { + addr: A, + prefixlen: u8, + nlri: NLRI, + /// Sorted map keyed by the BGP Identifier of the peer that sent the route. + peer_paths: BTreeMap>, + paths: BTreeSet>, +} + +impl PathSet { + pub fn new(addr: A, prefixlen: u8, nlri: NLRI) -> Self { + Self { + addr, + prefixlen, + nlri, + peer_paths: Default::default(), + paths: Default::default(), + } + } + + pub fn addr<'a>(&'a self) -> &'a A { + &self.addr + } + + pub fn prefixlen(&self) -> u8 { + self.prefixlen + } + + pub fn nlri<'a>(&'a self) -> &'a NLRI { + &self.nlri + } + + pub fn is_empty(&self) -> bool { + self.paths.is_empty() + } + + pub fn len(&self) -> usize { + self.paths.len() + } + + pub fn get_by_announcer(&self, announcer: &Ipv4Addr) -> Option> { + self.peer_paths.get(announcer).cloned() + } + + /// Inserts a PathData from a given announcer, returning a PathData if the best + /// route has been updated. + pub fn insert_pathdata( + &mut self, + announcer: &Ipv4Addr, + path_data: &Arc, + ) -> Option> { + let previous_best = self.paths.first().cloned(); + if let Some(existing) = self.peer_paths.get_mut(announcer) { + // Path exists already so we must first remove it from self.paths. + self.paths.remove(existing); + // Add the new path to self.paths. + self.paths.insert(path_data.clone()); + // Update it in the peer_paths map. + *existing = path_data.clone(); + } else { + // Path does not yet exist so we just add it in both structures. + self.paths.insert(path_data.clone()); + self.peer_paths.insert(*announcer, path_data.clone()); + } + let next_best = self.paths.first().cloned(); + // If the best path has changed, return the new best. + if previous_best != next_best { + return next_best; + } + // Update has not changed the best path. + return None; + } + + /// Removes a path from the PathSet. + pub fn remove_pathdata( + &mut self, + announcer: &Ipv4Addr, + nlri: &NLRI, + ) -> Result>> { + let previous_best = self.paths.first().cloned(); + if self.peer_paths.contains_key(&announcer) { + self.peer_paths.remove(&announcer); + self.paths + .retain(|e| e.path_source != PathSource::BGPPeer(*announcer)); + } else { + bail!("cannot remove pathdata for NLRI {} from {}, as it is not present in PathSet.peer_paths", + nlri, announcer); + } + let next_best = self.paths.first().cloned(); + // If the best path has changed, return the new best. + if previous_best != next_best { + return Ok(next_best); + } + // Update has not changed the best path. + return Ok(None); + } + + /// Iterator over the paths contained in this PathSet. + pub fn path_iter<'a>(&'a self) -> btree_set::Iter<'a, Arc> { + self.paths.iter() + } +} diff --git a/crates/server/src/peer.rs b/crates/server/src/peer.rs index 770f964..a4437ff 100644 --- a/crates/server/src/peer.rs +++ b/crates/server/src/peer.rs @@ -16,7 +16,9 @@ use crate::config::{PeerConfig, ServerConfig}; use crate::data_structures::RouteWithdraw; use crate::data_structures::{RouteInfo, RouteUpdate}; use crate::filter_eval::FilterEvaluator; -use crate::rib_manager::{PathData, PathSource, RouteManagerCommands}; +use crate::path::path_data::PathData; +use crate::path::path_set::PathSource; +use crate::rib_manager::RouteManagerCommands; use crate::route_server::route_server::PeerStatus; use bgp_packet::capabilities::{ BGPCapability, BGPCapabilityTypeValues, BGPCapabilityValue, BGPOpenOptionTypeValues, @@ -327,9 +329,9 @@ pub struct PeerStateMachine { /// the whole structure represents ADJ-RIB-IN. prefixes_in: IpLookupTable>, - // prefixes_out contains the routes we want to export to the peer. - // TODO: Use this. - //prefixes_out: IpLookupTable, + /// prefixes_out contains all the prefixes that we know of which can + /// be sent to the peer (unfiltered). + prefixes_out: IpLookupTable>, // Interface to this state machine pub iface_rx: mpsc::UnboundedReceiver, @@ -385,6 +387,7 @@ where }, })), prefixes_in: IpLookupTable::new(), + prefixes_out: IpLookupTable::new(), iface_rx, iface_tx, route_manager, diff --git a/crates/server/src/rib_manager.rs b/crates/server/src/rib_manager.rs index 0328055..d2f9521 100644 --- a/crates/server/src/rib_manager.rs +++ b/crates/server/src/rib_manager.rs @@ -14,20 +14,17 @@ use crate::config::PeerConfig; use crate::data_structures::RouteUpdate; +use crate::path::path_data::PathData; +use crate::path::path_set::PathSet; +use crate::path::path_set::PathSource; use crate::peer::PeerCommands; -use std::cmp::Eq; -use std::collections::BTreeMap; use std::collections::HashMap; use std::convert::TryInto; -use std::net::Ipv4Addr; use std::sync::Arc; use std::sync::Mutex; use bgp_packet::nlri::NLRI; -use bgp_packet::path_attributes::OriginPathAttribute; -use bgp_packet::path_attributes::PathAttribute; -use chrono::{DateTime, Utc}; use eyre::{bail, eyre}; use ip_network_table_deps_treebitmap::address::Address; use serde::Serialize; @@ -41,86 +38,6 @@ use super::data_structures::RouteWithdraw; type PeerInterface = mpsc::UnboundedSender; -#[derive(Debug, Clone, Serialize)] -pub enum PathSource { - LocallyConfigured, - /// BGPPeer represents a path that has been learned from a BGP peer, - /// and contains the Router ID of the peer. - BGPPeer(Ipv4Addr), -} - -/// PathData is a structure to contain a specific route via one nexthop. -/// Note that currently there is an assumption that there is only -/// one route per peer per prefix, but when ADD-PATH support is added -/// this will no longer hold true. -#[derive(Debug, Clone, Serialize)] -pub struct PathData { - /// The origin through which this path was learned. This is set to EGP when learned from - /// another peer, set to IGP when statically configured or from another control plane. - pub origin: OriginPathAttribute, - /// The nexthop that traffic can be sent to. - pub nexthop: Vec, - /// Where this path was learned from. - pub path_source: PathSource, - /// The local pref of this path. - pub local_pref: u32, - /// The multi exit discriminator of this path. - pub med: u32, - /// The path of autonomous systems to the destination along this path. - pub as_path: Vec, - /// Path attributes received from the peer. - pub path_attributes: Vec, - /// When the path was learned. - pub learn_time: DateTime, -} - -impl PartialEq for PathData { - fn eq(&self, other: &PathData) -> bool { - // Local pref. - if self.local_pref > other.local_pref { - return true; - } - - // Prefer paths that are locally originated. - if matches!(self.path_source, PathSource::LocallyConfigured) { - return true; - } - - // AS path length. - if self.as_path.len() < other.as_path.len() { - return true; - } - - // IGP < EGP < INCOMPLETE - if (self.origin as u8) < (other.origin as u8) { - return true; - } - - // MED lower is better, only checked if the announcing ASN is the same. - if let (Some(announcing_as_self), Some(announcing_as_other)) = - (self.as_path.last(), other.as_path.last()) - { - if announcing_as_self == announcing_as_other && self.med < other.med { - return true; - } - } - - // Pick the oldest path to prefer more stable ones. - self.learn_time < other.learn_time - } -} - -impl Eq for PathData {} - -#[derive(Debug, Clone, Serialize)] -pub struct PathSet { - pub addr: A, - pub prefixlen: u8, - pub nlri: NLRI, - /// Sorted map keyed by the BGP Identifier of the peer that sent the route. - pub paths: BTreeMap>, -} - /// RibSnapshot contians a version number and the dump of all the routes. #[derive(Debug, Serialize)] pub struct RibSnapshot { @@ -261,40 +178,35 @@ where let prefixlen = nlri.prefixlen; if let Some(path_set_wrapped) = self.rib.exact_match(addr, prefixlen.into()) { let mut path_set = path_set_wrapped.lock().unwrap(); - // There is already this prefix in the RIB, check if this is a - // reannouncement or fresh announcement. - match path_set.paths.get_mut(&peer_router_id) { - // Peer already announced this route before. - Some(existing) => { - trace!( - "Updating existing path attributes for NLRI: {}/{}", - addr, - prefixlen - ); - *existing = update.1.clone(); - } - // First time that this peer is announcing the route. - None => { - path_set.paths.insert(peer_router_id, update.1.clone()); + + if let Some(new_best) = path_set.insert_pathdata(&peer_router_id, &update.1) { + for (_config, peer) in self.peers.values() { + peer.send(PeerCommands::Announce(RouteUpdate::Announce(( + vec![nlri.clone()], + new_best.clone(), + ))))?; } } - // There is no explicit sorting and marking of the best path since - // BTreeMap is already sorted. - // Ignore errors sending due to no active receivers on the channel. let _ = self .pathset_streaming_handle .send((self.epoch, path_set.clone())); } else { // This prefix has never been seen before, so add a new PathSet for it. - let mut path_set = PathSet:: { - addr, - prefixlen: nlri.prefixlen, - nlri, - paths: BTreeMap::new(), - }; - path_set.paths.insert(peer_router_id, update.1.clone()); + let mut path_set = PathSet::::new(addr, nlri.prefixlen, nlri.clone()); + + if let Some(new_best) = path_set.insert_pathdata(&peer_router_id, &update.1) { + for (_config, peer) in self.peers.values() { + peer.send(PeerCommands::Announce(RouteUpdate::Announce(( + vec![nlri.clone()], + new_best.clone(), + ))))?; + } + } else { + bail!("Inconsistent state, adding new pathdata but no new best path"); + } + self.rib .insert(addr, prefixlen.into(), Mutex::new(path_set.clone())); @@ -313,19 +225,45 @@ where let mut pathset_empty = false; if let Some(path_set_wrapped) = self.rib.exact_match(addr, nlri.prefixlen.into()) { let mut path_set = path_set_wrapped.lock().unwrap(); - let removed = path_set.paths.remove(&update.peer_id); - if removed.is_none() { - warn!( - "Got a withdrawal for route {} from {}, which was not in RIB", - nlri, update.peer_id - ); + let removed = path_set.remove_pathdata(&update.peer_id, &nlri); + + match removed { + Err(_e) => { + warn!( + "Got a withdrawal for route {} from {}, which was not in RIB", + nlri, update.peer_id + ); + } + Ok(Some(new_best)) => { + // Communicate new_best to all peers. + for (_config, peer) in self.peers.values() { + peer.send(PeerCommands::Announce(RouteUpdate::Announce(( + vec![nlri.clone()], + new_best.clone(), + ))))?; + } + } + Ok(None) => { + // Do nothing here since we check below if the path is withdrawn. + } } + // Ignore errors sending due to no active receivers on the channel. let _ = self .pathset_streaming_handle .send((self.epoch, path_set.clone())); - if path_set.paths.is_empty() { + + if path_set.is_empty() { pathset_empty = true; + + for (_config, peer) in self.peers.values() { + peer.send(PeerCommands::Announce(RouteUpdate::Withdraw( + RouteWithdraw { + peer_id: update.peer_id, + prefixes: vec![nlri.clone()], + }, + )))?; + } } } else { warn!( @@ -333,6 +271,7 @@ where nlri, update.peer_id ); } + if pathset_empty { self.rib.remove(addr, nlri.prefixlen.into()); } @@ -402,10 +341,9 @@ mod tests { let prefixlen: u32 = 32; let lookup_result = rib_manager.lookup_path_exact(addr, prefixlen).unwrap(); - assert_eq!(lookup_result.paths.len(), 1); + assert_eq!(lookup_result.len(), 1); let path_result = lookup_result - .paths - .get(&"1.2.3.4".parse().unwrap()) + .get_by_announcer(&"1.2.3.4".parse().unwrap()) .unwrap(); assert_eq!(path_result.nexthop, nexthop.octets().to_vec()); } diff --git a/crates/server/src/route_server.rs b/crates/server/src/route_server.rs index 0ac15fa..b972498 100644 --- a/crates/server/src/route_server.rs +++ b/crates/server/src/route_server.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::path::path_set::PathSet; +use crate::path::path_set::PathSource; use crate::peer::PeerCommands; -use crate::rib_manager; -use crate::rib_manager::PathSource; use crate::rib_manager::RibSnapshot; use crate::rib_manager::RouteManagerCommands; use crate::route_server::route_server::bgp_server_admin_service_server::BgpServerAdminService; @@ -23,12 +23,13 @@ use crate::route_server::route_server::AddressFamily; use crate::route_server::route_server::DumpPathsRequest; use crate::route_server::route_server::DumpPathsResponse; use crate::route_server::route_server::Path; -use crate::route_server::route_server::PathSet; use crate::route_server::route_server::Prefix; use crate::route_server::route_server::StreamPathsRequest; + use bgp_packet::constants::AddressFamilyIdentifier; use route_server::PeerStatusRequest; use route_server::PeerStatusResponse; + use std::collections::HashMap; use std::net::Ipv4Addr; use std::net::Ipv6Addr; @@ -58,10 +59,9 @@ impl RouteServer { &self, manager: UnboundedSender>, // dump_tx is used to receive the current state before streaming starts. - dump_tx: UnboundedSender<(u64, rib_manager::PathSet)>, - ) -> Result)>, Status> { - let (stream_tx, stream_rx) = - oneshot::channel::)>>(); + dump_tx: UnboundedSender<(u64, PathSet)>, + ) -> Result)>, Status> { + let (stream_tx, stream_rx) = oneshot::channel::)>>(); if let Err(e) = manager.send(RouteManagerCommands::StreamRib(dump_tx, stream_tx)) { warn!("Failed to send StreamRib command to route manager: {}", e); return Err(tonic::Status::internal( @@ -77,19 +77,19 @@ impl RouteServer { /// Converts a rib_manager::PathSet into the proto format PathSet using the /// appropriate address family. fn transform_pathset( - mgr_ps: (u64, rib_manager::PathSet), + mgr_ps: (u64, PathSet), address_family: i32, - ) -> PathSet { - let mut proto_pathset = PathSet { + ) -> route_server::PathSet { + let mut proto_pathset = route_server::PathSet { epoch: mgr_ps.0, prefix: Some(Prefix { - ip_prefix: mgr_ps.1.nlri.prefix, - prefix_len: mgr_ps.1.nlri.prefixlen.into(), + ip_prefix: mgr_ps.1.nlri().prefix.clone(), + prefix_len: mgr_ps.1.nlri().prefixlen.into(), address_family, }), paths: vec![], }; - for (_, path) in mgr_ps.1.paths { + for path in mgr_ps.1.path_iter() { let proto_path = Path { as_path: path.as_path.clone(), local_pref: path.local_pref, @@ -167,16 +167,16 @@ impl RouteService for RouteServer { Ok(result) => { response.epoch = result.epoch; for pathset in result.routes { - let mut proto_pathset = PathSet { + let mut proto_pathset = route_server::PathSet { epoch: result.epoch, prefix: Some(Prefix { - ip_prefix: pathset.nlri.prefix, - prefix_len: pathset.nlri.prefixlen.into(), + ip_prefix: pathset.nlri().prefix.clone(), + prefix_len: pathset.nlri().prefixlen.into(), address_family: AddressFamily::IPv4.into(), }), paths: vec![], }; - for (_, path) in pathset.paths { + for path in pathset.path_iter() { let proto_path = Path { as_path: path.as_path.clone(), local_pref: path.local_pref, @@ -214,16 +214,16 @@ impl RouteService for RouteServer { Ok(result) => { response.epoch = result.epoch; for pathset in result.routes { - let mut proto_pathset = PathSet { + let mut proto_pathset = route_server::PathSet { epoch: result.epoch, prefix: Some(Prefix { - ip_prefix: pathset.nlri.prefix, - prefix_len: pathset.nlri.prefixlen.into(), + ip_prefix: pathset.nlri().prefix.clone(), + prefix_len: pathset.nlri().prefixlen.into(), address_family: AddressFamily::IPv6.into(), }), paths: vec![], }; - for (_, path) in pathset.paths { + for path in pathset.path_iter() { let proto_path = Path { as_path: path.as_path.clone(), local_pref: path.local_pref, @@ -252,7 +252,7 @@ impl RouteService for RouteServer { } } - type StreamPathsStream = ReceiverStream>; + type StreamPathsStream = ReceiverStream>; async fn stream_paths( &self,