Add cross connection test and cleanup rpc server a bit

This commit is contained in:
Rayhaan Jaufeerally
2024-07-16 23:21:29 +00:00
parent 7a99fda7a5
commit 77919edd15
14 changed files with 192 additions and 78 deletions

View File

@ -15,6 +15,8 @@ workspace = true
bgp_packet.workspace = true
byteorder = "1.4.3"
bytes.workspace = true
chrono = "0.4.38"
eyre.workspace = true
ip_network_table-deps-treebitmap.workspace = true
log.workspace = true
nom = "7.1"

View File

@ -68,10 +68,10 @@ message PeerStatusRequest {}
message PeerStatus {
string peer_name = 1;
string state = 2;
uint64 session_established_time = 3;
uint64 last_messaage_time = 4;
uint64 route_updates_in = 5;
uint64 route_updates_out = 6;
optional uint64 session_established_time = 3;
optional uint64 last_messaage_time = 4;
optional uint64 route_updates_in = 5;
optional uint64 route_updates_out = 6;
}
message PeerStatusResponse { repeated PeerStatus peer_status = 1; }

View File

@ -20,6 +20,7 @@ use crate::rib_manager::RibManager;
use crate::rib_manager::RibSnapshot;
use crate::rib_manager::RouteManagerCommands;
use crate::route_server;
use crate::route_server::route_server::bgp_server_admin_service_server::BgpServerAdminServiceServer;
use crate::route_server::route_server::route_service_server::RouteServiceServer;
use bgp_packet::constants::AddressFamilyIdentifier;
use std::collections::HashMap;
@ -212,7 +213,6 @@ async fn start_http_server(
match rx.await {
Ok(resp) => {
result += &format!("Peer state: <b>{:?}</b><br/>", resp.state);
result += &format!("<code>{:?}</code>", resp.config);
}
Err(e) => {
warn!("error on rx from peer channel: {}", e);
@ -461,10 +461,12 @@ impl Server {
peer_state_machines: peer_chan_map,
};
let svc = RouteServiceServer::new(rs);
let rs_svc = RouteServiceServer::new(rs.clone());
let adm_svc = BgpServerAdminServiceServer::new(rs);
tokio::spawn(async move {
if let Err(e) = tonic::transport::Server::builder()
.add_service(svc)
.add_service(rs_svc)
.add_service(adm_svc)
.serve(addr)
.await
{

View File

@ -65,9 +65,26 @@ pub struct PrefixAnnouncement {
/// Linklocal nexthop to be used for IPv6 announcements.
pub llnh: Option<Ipv6Addr>,
// Path attributes
/// Path attributes
pub local_pref: Option<u32>,
/// Multi exit discriminator
pub med: Option<u32>,
/// Legacy communities [RFC 1997]
pub communities: Option<Vec<String>>,
/// Large communities [RFC 8092]
pub large_communities: Option<Vec<String>>,
}
impl Default for PrefixAnnouncement {
fn default() -> Self {
Self {
prefix: "::/0".to_owned(),
nexthop: IpAddr::V6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)),
llnh: Default::default(),
local_pref: Default::default(),
med: Default::default(),
communities: Default::default(),
large_communities: Default::default(),
}
}
}

View File

@ -18,6 +18,7 @@ use crate::data_structures::RouteAnnounce;
use crate::data_structures::RouteWithdraw;
use crate::data_structures::{RouteInfo, RouteUpdate};
use crate::rib_manager::RouteManagerCommands;
use crate::route_server::route_server::PeerStatus;
use bgp_packet::capabilities::{
BGPCapability, BGPCapabilityTypeValues, BGPCapabilityValue, BGPOpenOptionTypeValues,
FourByteASNCapability, MultiprotocolCapability, OpenOption, OpenOptionCapabilities,
@ -44,10 +45,13 @@ use bgp_packet::path_attributes::{
};
use bgp_packet::traits::ParserContext;
use bytes::BytesMut;
use chrono::{DateTime, NaiveDateTime, Offset, TimeZone, Utc};
use eyre::{bail, eyre};
use ip_network_table_deps_treebitmap::address::Address;
use ip_network_table_deps_treebitmap::IpLookupTable;
use std::convert::TryFrom;
use std::convert::TryInto;
use std::io::ErrorKind;
use std::net::IpAddr;
use std::net::SocketAddr;
use std::sync::Arc;
@ -73,15 +77,6 @@ type PeerInterface = mpsc::UnboundedSender<PeerCommands>;
// not be expensive, and other tasks such as picking the best route
// will be done in a different threading model.
/// PeerStatus contians the current state of the PSM for monitoring
/// and debugging.
#[derive(Clone, Debug)]
pub struct PeerStatus {
pub name: String,
pub config: PeerConfig,
pub state: BGPState,
}
/// BGPState represents which state of the BGP state machine the peer
/// is currently in.
#[derive(Copy, Clone, Debug, PartialEq)]
@ -175,7 +170,7 @@ async fn run_timer(
async fn check_hold_timer(
cancel_token: CancellationToken,
iface: PeerInterface,
last_msg_time: Arc<RwLock<std::time::SystemTime>>,
last_msg_time: Arc<RwLock<DateTime<Utc>>>,
hold_time: std::time::Duration,
) {
loop {
@ -186,23 +181,16 @@ async fn check_hold_timer(
}
_ = tokio::time::sleep(std::time::Duration::from_secs(1)) => {
let last = last_msg_time.read().unwrap();
let elapsed_time = std::time::SystemTime::now().duration_since(*last);
match elapsed_time {
Ok(duration) => {
if duration > hold_time {
match iface.send(PeerCommands::TimerEvent(PeerTimerEvent::HoldTimerExpire())) {
Ok(()) => {},
Err(e) => {
warn!("Failed to send HoldTimerExpire message: {}", e);
}
}
// Exit the hold timer task since it's expired already and is not needed anymore.
return;
let elapsed_time = Utc::now() - *last;
if elapsed_time.num_seconds() as u64 > hold_time.as_secs() {
match iface.send(PeerCommands::TimerEvent(PeerTimerEvent::HoldTimerExpire())) {
Ok(()) => {},
Err(e) => {
warn!("Failed to send HoldTimerExpire message: {}", e);
}
}
Err(e) => {
warn!("Failed to check duration since last message: {}", e);
}
// Exit the hold timer task since it's expired already and is not needed anymore.
return;
}
}
@ -342,9 +330,12 @@ pub struct PeerStateMachine<A: Address> {
/// updates from the peer go to rib_in.
route_manager: mpsc::UnboundedSender<RouteManagerCommands<A>>,
// The time at which the session was established.
established_time: Option<DateTime<Utc>>,
// Keep track of the time of the last message to efficiently implement
// the hold timer.
last_msg_time: Arc<RwLock<std::time::SystemTime>>,
last_msg_time: Arc<RwLock<DateTime<Utc>>>,
// Timers and cancellation token to spawned tasks
connect_timer: Option<(JoinHandle<()>, CancellationToken)>,
@ -386,7 +377,8 @@ where
iface_rx,
iface_tx,
route_manager,
last_msg_time: Arc::new(RwLock::new(std::time::SystemTime::UNIX_EPOCH)),
established_time: None,
last_msg_time: Arc::new(RwLock::new(DateTime::from_timestamp(0, 0).unwrap())),
connect_timer: None,
hold_timer: None,
keepalive_timer: None,
@ -443,7 +435,7 @@ where
}
}
async fn handle_chan_msg(&mut self, c: PeerCommands) -> Result<(), std::io::Error> {
async fn handle_chan_msg(&mut self, c: PeerCommands) -> eyre::Result<()> {
match c {
PeerCommands::NewConnection(mut conn) => {
let peer_addr = conn.peer_addr()?;
@ -567,16 +559,14 @@ where
PeerCommands::MessageFromPeer(msg) => match self.handle_msg(msg).await {
Ok(_) => {
// Update the last time counter
// We call unwrap here because it indicates that some other thread which
// was accessing the lock had a panic.
// TODO: This should be handled more gracefully, maybe by shutting down the
// peer and starting it up again.
let mut last_time_lock = (*self.last_msg_time).write().unwrap();
*last_time_lock = std::time::SystemTime::now();
let mut last_time = self
.last_msg_time
.write()
.map_err(|e| eyre!(e.to_string()))?;
*last_time = Utc::now();
}
Err(e) => {
return Err(std::io::Error::new(std::io::ErrorKind::Other, e));
bail!(e);
}
},
PeerCommands::TimerEvent(timer_event) => match timer_event {
@ -623,9 +613,12 @@ where
},
PeerCommands::GetStatus(sender) => {
let state = PeerStatus {
name: self.config.name.clone(),
config: self.config.clone(),
state: self.state,
peer_name: self.config.name.clone(),
state: format!("{:?}", self.state),
session_established_time: self.established_time.map(|t| t.timestamp() as u64),
last_messaage_time: Some(self.last_msg_time.read().unwrap().timestamp() as u64),
route_updates_in: Some(0), /* todo */
route_updates_out: Some(0), /* todo */
};
match sender.send(state) {
Ok(()) => {}
@ -714,6 +707,7 @@ where
// Set the state machine back to the expected.
self.state = BGPState::Active;
self.established_time = None;
// Restart the connect timer to try and connect periodically.
{
@ -952,6 +946,7 @@ where
self.config.name, o.asn
);
self.state = BGPState::Active;
self.established_time = None;
if let Some(stream) = self.tcp_stream.as_mut() {
stream.shutdown().await.map_err(|e| e.to_string())?;
}
@ -1086,6 +1081,7 @@ where
BGPSubmessage::KeepaliveMessage(_) => {
// Switch the state from OpenConfirm to ESTABLISHED.
self.state = BGPState::Established;
self.established_time = Some(Utc::now());
if hold_time > 0 {
// Set keepalive timer.

View File

@ -106,9 +106,10 @@ pub enum RouteManagerCommands<A> {
pub struct RibManager<A: Address> {
mgr_rx: mpsc::UnboundedReceiver<RouteManagerCommands<A>>,
/// Peers configured on this server instance.
peers: HashMap<String, (PeerConfig, PeerInterface)>,
// We need to use a mutex for PathSet because IpLookupTable does not return a mut ptr.
rib: ip_network_table_deps_treebitmap::IpLookupTable<A, Mutex<PathSet<A>>>,
epoch: u64,

View File

@ -16,6 +16,7 @@ use crate::peer::PeerCommands;
use crate::rib_manager;
use crate::rib_manager::RibSnapshot;
use crate::rib_manager::RouteManagerCommands;
use crate::route_server::route_server::bgp_server_admin_service_server::BgpServerAdminService;
use crate::route_server::route_server::route_service_server::RouteService;
use crate::route_server::route_server::AddressFamily;
use crate::route_server::route_server::DumpPathsRequest;
@ -25,7 +26,8 @@ use crate::route_server::route_server::PathSet;
use crate::route_server::route_server::Prefix;
use crate::route_server::route_server::StreamPathsRequest;
use bgp_packet::constants::AddressFamilyIdentifier;
use log::warn;
use route_server::PeerStatusRequest;
use route_server::PeerStatusResponse;
use std::collections::HashMap;
use std::net::Ipv4Addr;
use std::net::Ipv6Addr;
@ -36,11 +38,13 @@ use tokio::sync::oneshot;
use tokio_stream::wrappers::ReceiverStream;
use tonic::Response;
use tonic::Status;
use tracing::warn;
pub mod route_server {
tonic::include_proto!("bgpd.grpc");
}
#[derive(Clone)]
pub struct RouteServer {
pub ip4_manager: UnboundedSender<RouteManagerCommands<Ipv4Addr>>,
pub ip6_manager: UnboundedSender<RouteManagerCommands<Ipv6Addr>>,
@ -98,6 +102,31 @@ impl RouteServer {
}
}
#[tonic::async_trait]
impl BgpServerAdminService for RouteServer {
async fn peer_status(
&self,
request: tonic::Request<PeerStatusRequest>,
) -> Result<Response<PeerStatusResponse>, Status> {
let mut result = PeerStatusResponse::default();
for peer in &self.peer_state_machines {
let (tx, rx) = oneshot::channel();
if let Err(e) = peer.1.send(PeerCommands::GetStatus(tx)) {
warn!(
peer = peer.0,
"Peer channel dead when trying to send state request"
);
continue;
}
let resp = rx.await.map_err(|e| Status::internal(format!("{}", e)))?;
result.peer_status.push(resp);
}
Ok(Response::new(result))
}
}
#[tonic::async_trait]
impl RouteService for RouteServer {
async fn dump_paths(