commit ff3211d1fcc2840286cefcc1463c266ccf02f4df Author: Rayhaan Jaufeerally Date: Mon Nov 22 12:23:26 2021 +0100 Re-import repository. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ff47c2d --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1f35b82 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[workspace] + +members = [ + "bgpd", + "netlink", + + # Tests + "tests/integration_tests" +] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..24023c9 --- /dev/null +++ b/README.md @@ -0,0 +1,66 @@ +# BGP + +This project implements a Border Gateway Protocol speaker as defined in [RFC4271](https://datatracker.ietf.org/doc/html/rfc4271) (not specification compliant yet, see status below), and provides programmatic access to the routes learned by the server. + +The aim of the project is to provide a fully programatic interface for configuring, filtering, and exporting routes from the server to other peers and to the forwarding plane. + +## Design + +The actual protocol interface logic (serializing bytes to objects, and objects to bytes) is contained within `bgpd/src/bgp_packet`. In there several key things are defined: + +* NLRIs - A Network Layer Reachability Information object is a fancy way to say IP prefix, and represents the bytes of the prefix and the prefix length. For example 2001:db8::/32 is `0x20 0x01 0x0d 0xb8` with a prefix length of 32. +* Path Attibutes - are carried within BGP UPDATE messages and contain a wealth of metadata about a particular route, and in cases such as IPv6 announcements over MP-BGP, contains the actual prefixes and their nexthops. The parsers for path attributes are in `bgpd/src/bgp_packet/path_attributes.rs`. +* BGP Messages and the associated top-level parsing logic is contained in `bgpd/src/bgp_packet/messages.rs`. + +The server logic is contained in `bgpd/src/server` and is split roughly in the following way: +* `config.rs` - Defines the configuration object which is read from the configuration file on startup. The goal is to have a fully programmatic way to configure the daemon, so this is an interim state / backup mechanism while a more scalable solution is implemented. It would be nice to be able to read the state from something like etcd on startup and provide a gRPC API to modify the server configuration. +* `peer.rs` - Contains the `PeerStateMachine` object which implements a finite state machine modelling the state of a peer, and this is where all peer related events are processed (e.g. reading messages from TCPStream, parsing them, doing things with UPDATE messages etc). +* `rib_manager.rs` - Processes routes from peers and stores them in a tree-bitmap. Also exposes an API for streaming path updates to remote receivers (e.g. over gRPC). +* `route_server.rs` - Implements a gRPC service for dumping and streaming routes. + +## Project Status + +The current state of the code is a barely functional proof of concept. Rayhaan uses this daemon at home for his home network, AS210036, but apart from the basic functionality of connecting to a peer, announcing a static set of routes, and streaming received routes out via gRPC, it does not do much more. + +There are an abundance of opportunities to contribute to the project, to make it fully standards compliant, and achieve the goals of full programmability. If you are interested please reach out to `rayhaan (at rayhaan (with ccTLD ch))`. + +### 🔥 P0 + +* Implementation of `route_client` to be able to properly install routes into the kernel. +* Route filters for inbound routes. +* Forwarding routes to peers. + +### 🕯️ P1 + +* Monitoring and status of sessions with peers (to be able to detect peer down etc) +* More comprehensive integration tests (to cover route acceptance / filters / propagation to RIB / forwarding / availability in the API etc). + +### RFCs + +The following are the RFCs that were consulted during the writing of the daemon so far, and there are certainly parts that are not yet covered, so this list will have to be revisited to check conformance / file bugs to track where the gaps are. + +* RFC4271 - https://datatracker.ietf.org/doc/html/rfc4271 + - BGP4 specification +* RFC4760 - https://datatracker.ietf.org/doc/html/rfc4760 + - Multiprotocol extensions for BGP4 +* RFC4693 - https://datatracker.ietf.org/doc/html/rfc6793 + - 4 byte ASNs + +#### RFC repository + +Here's a list of interesting RFCs that we should look at eventually: + +* RFC8212 - https://datatracker.ietf.org/doc/html/rfc8212 + - Default External BGP (EBGP) Route Propagation Behavior without Policies + +### TODOs + +A very rough sketch of some major TODOs: + +* Get `route_client` into a state where it actually works +* Support forwarding routes to other peers +* Implement filters comprehensively and design an API for setting them +* Implement programatic control plane via gRPC +* RPKI integration +* Design and implement monitoring interfaces + diff --git a/bgpd/Cargo.toml b/bgpd/Cargo.toml new file mode 100644 index 0000000..599ca9c --- /dev/null +++ b/bgpd/Cargo.toml @@ -0,0 +1,50 @@ +[package] +name = "bgpd" +version = "0.1.0" +authors = ["Rayhaan Jaufeerally "] +edition = "2021" + +[[bin]] +name = "bgp_server" +path = "src/main.rs" + +[[bin]] +name = "route_client" +path = "src/route_client/main.rs" + +[[bin]] +name = "streamer_cli" +path = "src/streamer_cli/main.rs" + +[dependencies] +anyhow = "1.0.56" +byteorder = "1.4.3" +bytes = "1.*" +clap = {version = "3.2.8", features = ["cargo", "derive"]} +futures = "0.3" +ipnet = "2.3.0" +libc = "0.2.126" +log = "0.4" +nom = "6.1.2" +prost = "0.8" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0.64" +signal-hook = { version = "0.3.10", features = ["extended-siginfo"] } +signal-hook-tokio = "0.3.0" +stderrlog = "0.5.1" +tokio = { version = "1.13.0", features = ["full"] } +tokio-stream = { version = "0.1.7", features = ["net"] } +tokio-util = { version = "0.6.7", features = ["codec"] } +tonic = { version = "0.5", features = ["compression"] } +tracing = "0.1" +tracing-subscriber = "0.2" +ip_network_table-deps-treebitmap = "0.5.0" +warp = "0.3" +rtnetlink = "0.9.1" +netlink-packet-route = "0.11.0" +netlink = { path = "../netlink" } +neli = "0.6.2" +async-trait = "0.1.57" + +[build-dependencies] +tonic-build = { version = "0.5.1", features = ["prost", "compression"] } diff --git a/bgpd/build.rs b/bgpd/build.rs new file mode 100644 index 0000000..9945377 --- /dev/null +++ b/bgpd/build.rs @@ -0,0 +1,19 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +fn main() { + tonic_build::configure() + .compile(&["proto/route_service.proto"], &["proto"]) + .unwrap(); +} diff --git a/bgpd/proto/route_service.proto b/bgpd/proto/route_service.proto new file mode 100644 index 0000000..5f4606c --- /dev/null +++ b/bgpd/proto/route_service.proto @@ -0,0 +1,83 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package bgpd.grpc; + +enum AddressFamily { + UNKNOWN = 0; + IPv4 = 1; + IPv6 = 2; +} + +message Prefix { + bytes ip_prefix = 1; + int32 prefix_len = 2; + AddressFamily address_family = 3; +} + +// Path represents the metadata associated with the route to a particular +// prefix. +message Path { + bytes nexthop = 1; + string peer_name = 2; + uint32 local_pref = 3; + uint32 med = 4; + repeated uint32 as_path = 5; + // TODO: Path attributes. Not yet supported because we need to generate proto + // definitions for all of them. +} + +message PathSet { + uint64 epoch = 1; + Prefix prefix = 2; + repeated Path paths = 3; +} + +message StreamPathsRequest { AddressFamily address_family = 1; } + +message DumpPathsRequest { AddressFamily address_family = 1; }; + +message DumpPathsResponse { + uint64 epoch = 1; + repeated PathSet path_sets = 2; +}; + +service RouteService { + // DumpPaths returns all the paths currently in the RIB. + rpc DumpPaths(DumpPathsRequest) returns (DumpPathsResponse); + // StreamPaths dumps the existing routes and starts streaming updates to the + // RIB. + rpc StreamPaths(StreamPathsRequest) returns (stream PathSet); +} + +message PeerStatusRequest {} + +message PeerStatus { + string peer_name = 1; + string state = 2; + uint64 session_established_time = 3; + uint64 last_messaage_time = 4; + uint64 route_updates_in = 5; + uint64 route_updates_out = 6; +} + +message PeerStatusResponse { repeated PeerStatus peer_status = 1; } + +// BGPServerAdminService implements an administrative interface to +// view the status and control the operation of this BGP server. +service BGPServerAdminService { + rpc PeerStatus(PeerStatusRequest) returns (PeerStatusResponse); +} \ No newline at end of file diff --git a/bgpd/rustfmt.toml b/bgpd/rustfmt.toml new file mode 100644 index 0000000..c51666e --- /dev/null +++ b/bgpd/rustfmt.toml @@ -0,0 +1 @@ +edition = "2018" \ No newline at end of file diff --git a/bgpd/src/bgp_packet/capabilities.rs b/bgpd/src/bgp_packet/capabilities.rs new file mode 100644 index 0000000..e319a36 --- /dev/null +++ b/bgpd/src/bgp_packet/capabilities.rs @@ -0,0 +1,660 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::constants::AddressFamilyIdentifier; +use crate::bgp_packet::constants::SubsequentAddressFamilyIdentifier; +use crate::bgp_packet::traits::BGPParserError; +use crate::bgp_packet::traits::ParserContext; +use crate::bgp_packet::traits::ReadablePacket; +use crate::bgp_packet::traits::WritablePacket; +use byteorder::{ByteOrder, NetworkEndian}; +use nom::number::complete::{be_u16, be_u8}; +use nom::Err::Failure; +use nom::IResult; +use std::fmt; + +/// BGPOpenOptionType represents the option types in the Open message. +#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug, Hash)] +pub struct BGPOpenOptionType(pub u8); + +impl BGPOpenOptionType { + pub fn new(val: u8) -> BGPOpenOptionType { + BGPOpenOptionType(val) + } +} + +impl Into for BGPOpenOptionType { + fn into(self) -> u8 { + self.0 + } +} + +#[allow(non_snake_case)] +#[allow(non_upper_case_globals)] +pub mod BGPOpenOptionTypeValues { + use super::BGPOpenOptionType; + + pub const CAPABILITIES: BGPOpenOptionType = BGPOpenOptionType(2); +} + +/// OpenOptionValue represents something which can be in the payload of OpenOption. +trait OpenOptionValue: ReadablePacket + WritablePacket + fmt::Debug {} + +#[derive(Debug, PartialEq)] +pub struct OpenOption { + pub option_type: BGPOpenOptionType, + pub oval: OpenOptions, +} + +impl ReadablePacket for OpenOption { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], OpenOption, BGPParserError<&'a [u8]>> { + let (buf, typ) = nom::combinator::complete(be_u8)(buf)?; + let (buf, val) = match BGPOpenOptionType(typ) { + BGPOpenOptionTypeValues::CAPABILITIES => { + let (b, cap) = OpenOptionCapabilities::from_wire(ctx, buf)?; + (b, OpenOptions::Capabilities(cap)) + } + _ => { + // TODO: This should gracefully degrrrrade and not fail the parser. + return Err(Failure(BGPParserError::CustomText( + "Unknown BGP OPEN option".to_string(), + ))); + } + }; + IResult::Ok(( + buf, + OpenOption { + option_type: BGPOpenOptionType(typ), + oval: val, + }, + )) + } +} + +impl WritablePacket for OpenOption { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf = Vec::new(); + match &self.oval { + OpenOptions::Capabilities(c) => { + buf.push(BGPOpenOptionTypeValues::CAPABILITIES.into()); + buf.append(&mut c.to_wire(ctx)?); + } + } + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + match &self.oval { + OpenOptions::Capabilities(c) => { + return Ok(2 + c.wire_len(ctx)?); + } + } + } +} + +impl fmt::Display for OpenOption { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "OpenOption: {}", self.oval) + } +} + +#[derive(Debug, PartialEq)] +pub enum OpenOptions { + Capabilities(OpenOptionCapabilities), +} + +impl fmt::Display for OpenOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + return match &self { + OpenOptions::Capabilities(c) => write!(f, "Capabilities: {}", c), + }; + } +} + +/// CapabilityList represents a list of capabilities which can be present in an OpenOption. +#[derive(Debug, PartialEq)] +pub struct OpenOptionCapabilities { + pub caps: Vec, +} + +impl ReadablePacket for OpenOptionCapabilities { + // from wire reads the length and value of the TLV. + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], OpenOptionCapabilities, BGPParserError<&'a [u8]>> { + let (buf, caps): (_, Vec) = nom::multi::length_value( + be_u8, + nom::multi::many0(|i| BGPCapability::from_wire(ctx, i)), + )(buf)?; + return IResult::Ok((buf, OpenOptionCapabilities { caps })); + } +} + +impl WritablePacket for OpenOptionCapabilities { + // to_wire writes the length and value of the TLV. + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf: Vec = Vec::new(); + buf.push(self.wire_len(ctx).unwrap() as u8); + for cap in &self.caps { + let mut result: Vec = (*cap).to_wire(ctx)?; + buf.append(&mut result); + } + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + let mut ttl: u16 = 0; + for cap in &self.caps { + ttl += (*cap).wire_len(ctx)?; + } + Ok(ttl) + } +} + +impl fmt::Display for OpenOptionCapabilities { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Capabilities: [")?; + for cap in &self.caps { + std::fmt::Display::fmt(cap, f)?; + } + write!(f, "]") + } +} + +/// BGP Capabilities. +#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug, Hash)] +pub struct BGPCapabilityType(pub u8); + +impl BGPCapabilityType { + pub fn new(val: u8) -> BGPCapabilityType { + BGPCapabilityType(val) + } +} + +impl Into for BGPCapabilityType { + fn into(self) -> u8 { + return self.0; + } +} + +#[allow(non_snake_case)] +#[allow(non_upper_case_globals)] +pub mod BGPCapabilityTypeValues { + use super::BGPCapabilityType; + + /// Multiprotocol Extensions for BGP-4 [RFC2858] + pub const MULTPROTOCOL_BGP4: BGPCapabilityType = BGPCapabilityType(1); + /// Route Refresh Capability for BGP-4 [RFC2918] + pub const ROUTE_REFRESH_BGP4: BGPCapabilityType = BGPCapabilityType(2); + /// Outbound Route Filtering Capability [RFC5291] + pub const OUTBOUND_ROUTE_FILTERING: BGPCapabilityType = BGPCapabilityType(3); + /// Extended Next Hop Encoding [RFC5549] + pub const EXTENDED_NEXT_HOP: BGPCapabilityType = BGPCapabilityType(5); + /// BGP Extended Message [RFC8654] + pub const EXTENDED_MESSAGE: BGPCapabilityType = BGPCapabilityType(6); + /// BGPsec Capability [RFC8205] + pub const BGPSEC: BGPCapabilityType = BGPCapabilityType(7); + /// Multiple Labels Capability [RFC8277] + pub const MULTILABEL_COMPAT: BGPCapabilityType = BGPCapabilityType(8); + + /// Graceful Restart Capability [RFC4724] + pub const GRACEFUL_RESTART: BGPCapabilityType = BGPCapabilityType(64); + /// Support for 4-octet AS number capability [RFC6793] + pub const FOUR_BYTE_ASN: BGPCapabilityType = BGPCapabilityType(65); + /// ADD-PATH Capability [RFC7911] + pub const ADD_PATH: BGPCapabilityType = BGPCapabilityType(69); + /// Enhanced Route Refresh Capability [RFC7313] + pub const ENHANCED_ROUTE_REFRESH: BGPCapabilityType = BGPCapabilityType(70); +} + +#[derive(Debug, PartialEq)] +pub struct BGPCapability { + pub cap_type: BGPCapabilityType, + pub val: BGPCapabilityValue, +} + +impl ReadablePacket for BGPCapability { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], BGPCapability, BGPParserError<&'a [u8]>> { + let (buf, cap_type) = nom::combinator::peek(be_u8)(buf)?; // Peek the type, if we know it, consume. + let (buf, val): (_, BGPCapabilityValue) = + match BGPCapabilityType(cap_type) { + BGPCapabilityTypeValues::FOUR_BYTE_ASN => { + let (buf, _) = be_u8(buf)?; // Consume type + let (buf, cap) = nom::multi::length_value(be_u8, |i| { + FourByteASNCapability::from_wire(ctx, i) + })(buf)?; + (buf, BGPCapabilityValue::FourByteASN(cap)) + } + BGPCapabilityTypeValues::MULTPROTOCOL_BGP4 => { + let (buf, _) = be_u8(buf)?; + let (buf, cap) = nom::multi::length_value(be_u8, |i| { + MultiprotocolCapability::from_wire(ctx, i) + })(buf)?; + (buf, BGPCapabilityValue::Multiprotocol(cap)) + } + BGPCapabilityTypeValues::ROUTE_REFRESH_BGP4 => { + let (buf, _) = be_u8(buf)?; + let (buf, cap) = nom::multi::length_value(be_u8, |i| { + RouteRefreshCapability::from_wire(ctx, i) + })(buf)?; + (buf, BGPCapabilityValue::RouteRefresh(cap)) + } + BGPCapabilityTypeValues::GRACEFUL_RESTART => { + let (buf, _) = be_u8(buf)?; + let (buf, cap) = nom::multi::length_value(be_u8, |i| { + GracefulRestartCapability::from_wire(ctx, i) + })(buf)?; + (buf, BGPCapabilityValue::GracefulRestart(cap)) + } + _ => { + // If we do not know what this is, then put the bytes in an UnknownCapability. + let (buf, cap) = UnknownCapability::from_wire(ctx, buf)?; + (buf, BGPCapabilityValue::UnknownCapability(cap)) + } + }; + IResult::Ok(( + buf, + BGPCapability { + cap_type: BGPCapabilityType(cap_type), + val, + }, + )) + } +} + +impl WritablePacket for BGPCapability { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf: Vec = vec![]; + buf.push(self.cap_type.into()); + match &self.val { + BGPCapabilityValue::FourByteASN(v) => { + buf.push(v.wire_len(ctx)? as u8); + buf.extend_from_slice(&v.to_wire(ctx)?); + } + BGPCapabilityValue::Multiprotocol(v) => { + buf.push(v.wire_len(ctx)? as u8); + buf.extend_from_slice(&v.to_wire(ctx)?); + } + BGPCapabilityValue::RouteRefresh(v) => { + buf.push(v.wire_len(ctx)? as u8); + buf.extend_from_slice(&v.to_wire(ctx)?); + } + BGPCapabilityValue::GracefulRestart(v) => { + buf.push(v.wire_len(ctx)? as u8); + buf.extend_from_slice(&v.to_wire(ctx)?); + } + BGPCapabilityValue::UnknownCapability(v) => { + buf.push(v.wire_len(ctx)? as u8); + buf.extend_from_slice(&v.to_wire(ctx)?); + } + }; + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + // BGPCapabilityType(u8) + cap_len(u8) + val + return match &self.val { + BGPCapabilityValue::FourByteASN(v) => Ok(2 + v.wire_len(ctx)?), + BGPCapabilityValue::Multiprotocol(v) => Ok(2 + v.wire_len(ctx)?), + BGPCapabilityValue::RouteRefresh(v) => Ok(2 + v.wire_len(ctx)?), + BGPCapabilityValue::GracefulRestart(v) => Ok(2 + v.wire_len(ctx)?), + BGPCapabilityValue::UnknownCapability(v) => Ok(2 + v.wire_len(ctx)?), + }; + } +} + +impl fmt::Display for BGPCapability { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + std::fmt::Display::fmt(&self.val, f) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub enum BGPCapabilityValue { + FourByteASN(FourByteASNCapability), + Multiprotocol(MultiprotocolCapability), + RouteRefresh(RouteRefreshCapability), + GracefulRestart(GracefulRestartCapability), + UnknownCapability(UnknownCapability), +} + +impl fmt::Display for BGPCapabilityValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + BGPCapabilityValue::FourByteASN(v) => std::fmt::Display::fmt(v, f), + BGPCapabilityValue::Multiprotocol(v) => std::fmt::Display::fmt(v, f), + BGPCapabilityValue::RouteRefresh(v) => std::fmt::Display::fmt(v, f), + BGPCapabilityValue::GracefulRestart(v) => std::fmt::Display::fmt(v, f), + BGPCapabilityValue::UnknownCapability(v) => std::fmt::Display::fmt(v, f), + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct UnknownCapability { + cap_code: u8, + payload: Vec, +} + +impl ReadablePacket for UnknownCapability { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, typ) = be_u8(buf)?; + let (buf, len) = be_u8(buf)?; + let (buf, payload) = nom::bytes::complete::take(len)(buf)?; + Ok(( + buf, + UnknownCapability { + cap_code: typ, + payload: payload.to_vec(), + }, + )) + } +} + +impl WritablePacket for UnknownCapability { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + let mut buf = vec![]; + // No need to push the type or length on as that's done at a higher level. + buf.extend(self.payload.to_owned()); + Ok(buf) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(self.payload.len() as u16) + } +} + +impl fmt::Display for UnknownCapability { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "UnknownCapability type: {}", self.cap_code) + } +} + +/// FourByteASNCapability represents the four byte BGP Capability value. +#[derive(Clone, Debug, PartialEq)] +pub struct FourByteASNCapability { + pub asn: u32, +} + +impl FourByteASNCapability { + fn new(asn: u32) -> FourByteASNCapability { + FourByteASNCapability { asn } + } +} + +impl ReadablePacket for FourByteASNCapability { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, asn) = nom::combinator::complete(nom::number::complete::be_u32)(buf)?; + return IResult::Ok((buf, FourByteASNCapability::new(asn))); + } +} + +impl WritablePacket for FourByteASNCapability { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + let mut buf: Vec = vec![0; 4]; + byteorder::NetworkEndian::write_u32(&mut buf, self.asn); + Ok(buf) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(4) + } +} + +impl fmt::Display for FourByteASNCapability { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "FourByteASN: asn: {}", self.asn) + } +} + +/// MultiprotocolExtCapability represents support for RFC 4760. +#[derive(Clone, Debug, PartialEq)] +pub struct MultiprotocolCapability { + pub afi: AddressFamilyIdentifier, + pub safi: SubsequentAddressFamilyIdentifier, +} + +impl MultiprotocolCapability { + fn new( + afi: AddressFamilyIdentifier, + safi: SubsequentAddressFamilyIdentifier, + ) -> MultiprotocolCapability { + MultiprotocolCapability { afi, safi } + } +} + +impl ReadablePacket for MultiprotocolCapability { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], MultiprotocolCapability, BGPParserError<&'a [u8]>> { + let (buf, (afi_raw, _, safi_raw)) = nom::combinator::complete(nom::sequence::tuple(( + |i| AddressFamilyIdentifier::from_wire(ctx, i), + nom::bytes::complete::take(1u8), + |i| SubsequentAddressFamilyIdentifier::from_wire(ctx, i), + )))(buf)?; + + let afi = AddressFamilyIdentifier::try_from(afi_raw) + .map_err(|e| nom::Err::Error(BGPParserError::CustomText(e.to_string())))?; + let safi = SubsequentAddressFamilyIdentifier::try_from(safi_raw) + .map_err(|e| nom::Err::Error(BGPParserError::CustomText(e.to_string())))?; + + IResult::Ok((buf, MultiprotocolCapability::new(afi, safi))) + } +} + +impl WritablePacket for MultiprotocolCapability { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + // [ AFI: uint16, 0: uint8, SAFI: uint8 ] + let mut res = [0u8; 4]; + byteorder::NetworkEndian::write_u16(&mut res[..2], self.afi.into()); + res[3] = self.safi.into(); + Ok(res.to_vec()) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(4) + } +} + +impl fmt::Display for MultiprotocolCapability { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "MultiprotocolCapbility: [ {} {} ]", self.afi, self.safi,) + } +} + +// Route refresh capability +#[derive(Clone, Debug, PartialEq)] +pub struct RouteRefreshCapability {} + +impl WritablePacket for RouteRefreshCapability { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + Ok(vec![]) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(0) + } +} + +impl ReadablePacket for RouteRefreshCapability { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], RouteRefreshCapability, BGPParserError<&'a [u8]>> { + IResult::Ok((buf, RouteRefreshCapability {})) + } +} + +impl fmt::Display for RouteRefreshCapability { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "RouteRefreshCapability") + } +} + +// Graceful restart capability +#[derive(Clone, Debug, PartialEq)] +pub struct GracefulRestartCapability { + pub restart_state: bool, // 4 bits total, most sig bit here, rest reserved. + pub restart_time_sec: u16, // 12 bits. + pub payloads: Vec, +} + +// GracefulRestartPayload represents the contents of the graceful restart cap. +#[derive(Clone, Debug, PartialEq)] +pub struct GracefulRestartPayload { + pub afi: AddressFamilyIdentifier, + pub safi: SubsequentAddressFamilyIdentifier, + pub af_flags: bool, // 8 bits total, most significant bit used here. +} + +impl ReadablePacket for GracefulRestartPayload { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], GracefulRestartPayload, BGPParserError<&'a [u8]>> { + let (buf, (afi, safi, flags)) = nom::combinator::complete(nom::sequence::tuple(( + |i| AddressFamilyIdentifier::from_wire(ctx, i), + |i| SubsequentAddressFamilyIdentifier::from_wire(ctx, i), + be_u8, + )))(buf)?; + IResult::Ok(( + buf, + GracefulRestartPayload { + afi, + safi, + af_flags: (0x80 & flags) != 0, + }, + )) + } +} + +impl WritablePacket for GracefulRestartPayload { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + let afi: u16 = self.afi.into(); + let mut res = vec![0u8; 2]; + byteorder::NetworkEndian::write_u16(res.as_mut(), afi.into()); + res.push(self.safi.into()); + res.push(if self.af_flags { 0x80 } else { 0 }); + Ok(res) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(4) + } +} + +impl fmt::Display for GracefulRestartPayload { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "GracefulRestartPayload: [afi:{} safi:{} af_flags:{}]", + self.afi, self.safi, self.af_flags + ) + } +} + +impl ReadablePacket for GracefulRestartCapability { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, state_rt) = nom::combinator::complete(be_u16)(buf)?; + let (buf, payloads): (_, Vec) = + nom::multi::many0(|i| GracefulRestartPayload::from_wire(ctx, i))(buf)?; + let restart_time_sec: u16 = 0x0fff & state_rt; // Lower 14 bits. + let restart_state: bool = (0x8000 & state_rt) != 0; // highest bit + IResult::Ok(( + buf, + GracefulRestartCapability { + restart_state, + restart_time_sec, + payloads, + }, + )) + } +} + +impl WritablePacket for GracefulRestartCapability { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf: Vec = vec![0u8; 2]; + let state_rt: u16 = ((self.restart_state as u16) << 15) | (0xfff & self.restart_time_sec); + NetworkEndian::write_u16(&mut buf, state_rt); + for item in &self.payloads { + buf.append(&mut item.to_wire(ctx)?); + } + Ok(buf) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok((2 + self.payloads.len() * 4) as u16) + } +} + +impl fmt::Display for GracefulRestartCapability { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "GracefulRestartCapability: [")?; + for value in &self.payloads { + fmt::Display::fmt(value, f)?; + } + write!(f, " ]") + } +} + +#[cfg(test)] +mod tests { + + use super::BGPCapability; + use super::BGPCapabilityTypeValues; + use super::BGPCapabilityValue; + use super::FourByteASNCapability; + use super::OpenOption; + use crate::bgp_packet::constants::AddressFamilyIdentifier::Ipv6; + use crate::bgp_packet::traits::ParserContext; + use crate::bgp_packet::traits::ReadablePacket; + + #[test] + fn test_four_byte_asn_capability() { + let bytes: &[u8] = &[0x41, 0x04, 0x00, 0x00, 0x00, 0x2a]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let (buf, result) = BGPCapability::from_wire(ctx, bytes).unwrap(); + assert_eq!( + result, + BGPCapability { + cap_type: BGPCapabilityTypeValues::FOUR_BYTE_ASN, + val: BGPCapabilityValue::FourByteASN(FourByteASNCapability { asn: 42 }) + } + ); + assert_eq!(buf.len(), 0); + } + + #[test] + fn test_open_options<'a>() { + let option_bytes: &[u8] = &[ + 0x02, 0x06, 0x01, 0x04, 0x00, 0x01, 0x00, 0x01, 0x02, 0x02, 0x80, 0x00, 0x02, 0x02, + 0x02, 0x00, 0x02, 0x02, 0x46, 0x00, 0x02, 0x06, 0x41, 0x04, 0x00, 0x00, 0x00, 0x2a, + ]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let (_buf, result) = + nom::multi::many0(|buf: &'a [u8]| OpenOption::from_wire(ctx, buf))(option_bytes) + .unwrap(); + + let expected_str = "[OpenOption { option_type: BGPOpenOptionType(2), oval: Capabilities(OpenOptionCapabilities { caps: [BGPCapability { cap_type: BGPCapabilityType(1), val: Multiprotocol(MultiprotocolCapability { afi: Ipv4, safi: Unicast }) }] }) }, OpenOption { option_type: BGPOpenOptionType(2), oval: Capabilities(OpenOptionCapabilities { caps: [BGPCapability { cap_type: BGPCapabilityType(128), val: UnknownCapability(UnknownCapability { cap_code: 128, payload: [] }) }] }) }, OpenOption { option_type: BGPOpenOptionType(2), oval: Capabilities(OpenOptionCapabilities { caps: [BGPCapability { cap_type: BGPCapabilityType(2), val: RouteRefresh(RouteRefreshCapability) }] }) }, OpenOption { option_type: BGPOpenOptionType(2), oval: Capabilities(OpenOptionCapabilities { caps: [BGPCapability { cap_type: BGPCapabilityType(70), val: UnknownCapability(UnknownCapability { cap_code: 70, payload: [] }) }] }) }, OpenOption { option_type: BGPOpenOptionType(2), oval: Capabilities(OpenOptionCapabilities { caps: [BGPCapability { cap_type: BGPCapabilityType(65), val: FourByteASN(FourByteASNCapability { asn: 42 }) }] }) }]"; + assert_eq!(format!("{:?}", result), expected_str); + } +} diff --git a/bgpd/src/bgp_packet/constants.rs b/bgpd/src/bgp_packet/constants.rs new file mode 100644 index 0000000..d88947e --- /dev/null +++ b/bgpd/src/bgp_packet/constants.rs @@ -0,0 +1,132 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use nom::IResult; +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::io::ErrorKind; + +use super::traits::{BGPParserError, ParserContext, ReadablePacket}; + +// Address Family Identifiers as per +// https://www.iana.org/assignments/address-family-numbers/address-family-numbers.xhtml +#[derive(Eq, PartialEq, Debug, Copy, Clone, Serialize, Deserialize, Hash)] +pub enum AddressFamilyIdentifier { + Ipv4, + Ipv6, +} + +impl Into for AddressFamilyIdentifier { + fn into(self) -> u16 { + match self { + Self::Ipv4 => 1, + Self::Ipv6 => 2, + } + } +} + +impl TryFrom for AddressFamilyIdentifier { + type Error = std::io::Error; + fn try_from(i: u16) -> Result { + match i { + 1 => Ok(Self::Ipv4), + 2 => Ok(Self::Ipv6), + _ => Err(std::io::Error::new( + ErrorKind::InvalidInput, + format!("Unknown AFI: {}", i), + )), + } + } +} + +/// This parser for AFI makes it easier to write the other message parsers. +impl ReadablePacket for AddressFamilyIdentifier { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], AddressFamilyIdentifier, BGPParserError<&'a [u8]>> { + let (buf, afi_raw) = nom::number::complete::be_u16(buf)?; + + let afi = AddressFamilyIdentifier::try_from(afi_raw) + .map_err(|e| nom::Err::Error(BGPParserError::CustomText(e.to_string())))?; + + IResult::Ok((buf, afi)) + } +} + +impl fmt::Display for AddressFamilyIdentifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Ipv4 => write!(f, "Ipv4"), + Self::Ipv6 => write!(f, "Ipv6"), + } + } +} + +// Subsequent Address Family Identifiers as per +// https://www.iana.org/assignments/safi-namespace/safi-namespace.xhtml +#[derive(Eq, PartialEq, Debug, Copy, Clone, Serialize, Deserialize)] +pub enum SubsequentAddressFamilyIdentifier { + Unicast, + Multicast, +} + +impl Into for SubsequentAddressFamilyIdentifier { + fn into(self) -> u8 { + match self { + Self::Unicast => 1, + Self::Multicast => 2, + } + } +} + +impl TryFrom for SubsequentAddressFamilyIdentifier { + type Error = std::io::Error; + fn try_from(i: u8) -> Result { + match i { + 1 => Ok(Self::Unicast), + 2 => Ok(Self::Multicast), + _ => Err(std::io::Error::new( + ErrorKind::InvalidInput, + format!("Unknown SAFI value: {} ", i), + )), + } + } +} + +/// This parser for SAFI makes it easier to write the other message parsers. +impl ReadablePacket for SubsequentAddressFamilyIdentifier { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], SubsequentAddressFamilyIdentifier, BGPParserError<&'a [u8]>> { + let (buf, safi_raw) = nom::number::complete::be_u8(buf)?; + + let safi = SubsequentAddressFamilyIdentifier::try_from(safi_raw) + .map_err(|e| nom::Err::Error(BGPParserError::CustomText(e.to_string())))?; + + IResult::Ok((buf, safi)) + } +} + +impl fmt::Display for SubsequentAddressFamilyIdentifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Unicast => write!(f, "Unicast"), + Self::Multicast => write!(f, "Multicast"), + } + } +} + +pub const AS_TRANS: u16 = 23456; diff --git a/bgpd/src/bgp_packet/messages.rs b/bgpd/src/bgp_packet/messages.rs new file mode 100644 index 0000000..0dc5910 --- /dev/null +++ b/bgpd/src/bgp_packet/messages.rs @@ -0,0 +1,710 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::capabilities::OpenOption; +use crate::bgp_packet::constants::AddressFamilyIdentifier; +use crate::bgp_packet::constants::SubsequentAddressFamilyIdentifier; +use crate::bgp_packet::nlri::NLRI; +use crate::bgp_packet::path_attributes::PathAttribute; +use crate::bgp_packet::traits::BGPParserError; +use crate::bgp_packet::traits::ParserContext; +use crate::bgp_packet::traits::ReadablePacket; +use crate::bgp_packet::traits::WritablePacket; +use byteorder::{ByteOrder, NetworkEndian}; +use bytes::Buf; +use bytes::BufMut; +use bytes::BytesMut; +use nom::number::complete::{be_u16, be_u32, be_u8}; +use nom::Err::Failure; +use nom::IResult; +use std::convert::TryInto; +use std::fmt; +use std::fmt::Display; +use std::net::Ipv4Addr; +use tokio_util::codec::{Decoder, Encoder}; + +/// BGPMessageType represents the type of the top level BGP message. +#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug, Hash)] +pub struct BGPMessageType(pub u8); + +impl BGPMessageType { + pub fn new(val: u8) -> BGPMessageType { + BGPMessageType(val) + } +} + +impl Into for BGPMessageType { + fn into(self) -> u8 { + self.0 + } +} +impl From for BGPMessageType { + fn from(i: u8) -> BGPMessageType { + BGPMessageType(i) + } +} + +#[allow(non_snake_case)] +#[allow(non_upper_case_globals)] +pub mod BGPMessageTypeValues { + use super::BGPMessageType; + + pub const OPEN_MESSAGE: BGPMessageType = BGPMessageType(1); + pub const UPDATE_MESSAGE: BGPMessageType = BGPMessageType(2); + pub const NOTIFICATION_MESSAGE: BGPMessageType = BGPMessageType(3); + pub const KEEPALIVE_MESSAGE: BGPMessageType = BGPMessageType(4); + pub const REFRESH_MESSAGE: BGPMessageType = BGPMessageType(5); +} + +#[derive(Debug, PartialEq)] +pub enum BGPSubmessage { + OpenMessage(OpenMessage), + UpdateMessage(UpdateMessage), + NotificationMessage(NotificationMessage), + KeepaliveMessage(KeepaliveMessage), +} + +impl WritablePacket for BGPSubmessage { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + match &self { + BGPSubmessage::OpenMessage(m) => m.to_wire(ctx), + BGPSubmessage::UpdateMessage(m) => m.to_wire(ctx), + BGPSubmessage::NotificationMessage(m) => m.to_wire(ctx), + BGPSubmessage::KeepaliveMessage(m) => m.to_wire(ctx), + } + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + match &self { + BGPSubmessage::OpenMessage(m) => m.wire_len(ctx), + BGPSubmessage::UpdateMessage(m) => m.wire_len(ctx), + BGPSubmessage::NotificationMessage(m) => m.wire_len(ctx), + BGPSubmessage::KeepaliveMessage(m) => m.wire_len(ctx), + } + } +} + +/// KeepaliveMessage implements the KEEPALIVE message as defined in RFC4271. +#[derive(Debug, PartialEq)] +pub struct KeepaliveMessage {} + +impl ReadablePacket for KeepaliveMessage { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + Ok((buf, KeepaliveMessage {})) + } +} + +impl WritablePacket for KeepaliveMessage { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + Ok(vec![]) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(0) + } +} + +impl Display for KeepaliveMessage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "KeepaliveMessage") + } +} + +/// NotificationMessage implements the NOTIFICATION message type as defined in RFC4271. +#[derive(Debug, PartialEq)] +pub struct NotificationMessage { + pub error_code: u8, + pub error_subcode: u8, + pub data: Vec, +} + +impl ReadablePacket for NotificationMessage { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, ec) = be_u8(buf)?; + let (buf, esc) = be_u8(buf)?; + let data = &buf; + Ok(( + &[0u8; 0], + NotificationMessage { + error_code: ec, + error_subcode: esc, + data: data.to_vec(), + }, + )) + } +} + +impl WritablePacket for NotificationMessage { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + let mut buf = vec![]; + buf.push(self.error_code); + buf.push(self.error_subcode); + buf.extend(self.data.to_owned()); + Ok(buf) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(2 + self.data.len() as u16) + } +} + +impl Display for NotificationMessage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "NotificationMessage error_code: {}, error_subcode: {}", + self.error_code, self.error_subcode + ) + } +} + +#[derive(Debug, PartialEq)] +pub struct RouteRefreshMessage { + pub afi: AddressFamilyIdentifier, + pub safi: SubsequentAddressFamilyIdentifier, +} + +impl WritablePacket for RouteRefreshMessage { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + let mut res = [0u8; 4]; + byteorder::NetworkEndian::write_u16(&mut res[..2], self.afi.into()); + res[3] = self.safi.into(); + Ok(res.to_vec()) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(4) + } +} + +impl ReadablePacket for RouteRefreshMessage { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, (afi, _, safi)) = nom::combinator::complete(nom::sequence::tuple(( + |i| AddressFamilyIdentifier::from_wire(ctx, i), + nom::bytes::complete::take(1u8), + |i| SubsequentAddressFamilyIdentifier::from_wire(ctx, i), + )))(buf)?; + + IResult::Ok((buf, RouteRefreshMessage { afi, safi })) + } +} + +impl Display for RouteRefreshMessage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "RouteRefresh [afi: {}, safi: {}]", self.afi, self.safi) + } +} + +/// BGPMessage is the top level message which is transmitted over the wire. +#[derive(Debug, PartialEq)] +pub struct BGPMessage { + pub msg_type: BGPMessageType, + pub payload: BGPSubmessage, +} + +/// Codec is a helper for serializing and deserializing BGP messages. +pub struct Codec { + pub ctx: ParserContext, +} + +impl Encoder for Codec { + type Error = std::io::Error; + fn encode( + &mut self, + msg: BGPMessage, + buf: &mut BytesMut, + ) -> Result<(), >::Error> { + let result = msg.to_wire(&self.ctx); + match result { + Ok(bytes) => { + // XXX: Copying here because the whole write path needs to be updated + // to take a refrence to BytesMut and write to that directly. + let tmp: BytesMut = bytes.as_slice().into(); + buf.put(tmp); + Ok(()) + } + Err(e) => Err(std::io::Error::new(std::io::ErrorKind::Other, e)), + } + } +} + +impl Decoder for Codec { + type Item = BGPMessage; + type Error = std::io::Error; + fn decode( + &mut self, + buf: &mut BytesMut, + ) -> Result::Item>, ::Error> { + // We first check to see if the frame contains the full BGP message before invoking + // the parser on it. + // Expected contents: 16x 0xff, u16 of length. + // The length contains the header length, so we just check that the buf len matches. + if buf.len() < 19 { + // Minimum size is 19 for header + length + type. + return Ok(None); + } + // Read the length + let len: u16 = byteorder::BigEndian::read_u16(&buf[16..18]); + if buf.len() < len.into() { + // Not enough data to read this frame. + return Ok(None); + } else if buf.len() == len as usize { + // Exactly one message here, parse and clear buf. + let parse_result = BGPMessage::from_wire(&self.ctx, buf.as_ref()); + match parse_result { + Ok(msg) => { + let result = msg.1; + buf.clear(); + Ok(Some(result)) + } + Err(e) => Err(std::io::Error::new( + std::io::ErrorKind::Other, + format!("Failed to parse message: {:?}", e), + )), + } + } else { + // More than one message here, parse and advance buf. + let parse_result = BGPMessage::from_wire(&self.ctx, buf.as_ref()); + match parse_result { + Ok(msg) => { + let result = msg.1; + buf.advance(len as usize); + Ok(Some(result)) + } + Err(e) => Err(std::io::Error::new( + std::io::ErrorKind::Other, + format!("Failed to parse message: {:?}", e), + )), + } + } + } +} + +impl WritablePacket for BGPMessage { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf: Vec = Vec::new(); + // 16 bytes of 0xff according to Section 4.1 of RFC4271. + buf.append(&mut vec![0xff; 16]); + // Length. + { + let mut tmp: [u8; 2] = [0u8; 2]; + NetworkEndian::write_u16(&mut tmp, self.wire_len(ctx)?); + buf.extend_from_slice(&mut tmp); + } + // Type + buf.push(self.msg_type.into()); + let mut result: Vec = self.payload.to_wire(ctx)?; + buf.append(&mut result); + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + Ok(16 + 2 + 1 + self.payload.wire_len(ctx)?) + } +} + +impl ReadablePacket for BGPMessage { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, _) = nom::combinator::complete(nom::bytes::complete::tag(&[0xff; 16]))(buf)?; + let (buf, len) = nom::combinator::complete(be_u16)(buf)?; + let (buf, typ) = nom::combinator::complete(be_u8)(buf)?; + let payload_len = len - 19; + let (buf, payload_bytes) = nom::bytes::complete::take(payload_len)(buf)?; + let (_, payload) = match typ.into() { + BGPMessageTypeValues::OPEN_MESSAGE => { + let (b, omsg) = OpenMessage::from_wire(ctx, payload_bytes)?; + (b, BGPSubmessage::OpenMessage(omsg)) + } + BGPMessageTypeValues::UPDATE_MESSAGE => { + let (b, umsg) = UpdateMessage::from_wire(ctx, payload_bytes)?; + (b, BGPSubmessage::UpdateMessage(umsg)) + } + BGPMessageTypeValues::NOTIFICATION_MESSAGE => { + let (b, nmsg) = NotificationMessage::from_wire(ctx, payload_bytes)?; + (b, BGPSubmessage::NotificationMessage(nmsg)) + } + BGPMessageTypeValues::KEEPALIVE_MESSAGE => { + let (b, kmsg) = KeepaliveMessage::from_wire(ctx, payload_bytes)?; + (b, BGPSubmessage::KeepaliveMessage(kmsg)) + } + _ => { + return Err(Failure(BGPParserError::CustomText( + "Unknown BGP message type".to_string(), + ))); + } + }; + Ok(( + buf, + BGPMessage { + msg_type: BGPMessageType(typ), + payload, + }, + )) + } +} + +impl Display for BGPMessage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.payload { + BGPSubmessage::OpenMessage(m) => fmt::Display::fmt(&m, f), + BGPSubmessage::UpdateMessage(m) => fmt::Display::fmt(&m, f), + BGPSubmessage::KeepaliveMessage(m) => fmt::Display::fmt(&m, f), + BGPSubmessage::NotificationMessage(m) => fmt::Display::fmt(&m, f), + } + } +} + +#[derive(Debug, PartialEq)] +pub struct OpenMessage { + pub version: u8, + pub asn: u16, + pub hold_time: u16, + pub identifier: Ipv4Addr, + pub options: Vec, +} + +impl ReadablePacket for OpenMessage { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], OpenMessage, BGPParserError<&'a [u8]>> { + let (buf, (version, asn, hold_time, identifier)) = + nom::combinator::complete(nom::sequence::tuple((be_u8, be_u16, be_u16, be_u32)))(buf)?; + // oplen, [ [OpenOption] ... ] + // OpenOption = [T, L, V] + let (buf, opts): (_, Vec) = nom::multi::length_value( + be_u8, + nom::multi::many0(|b| OpenOption::from_wire(ctx, b)), + )(buf)?; + Ok(( + buf, + OpenMessage { + version, + asn, + hold_time, + identifier: Ipv4Addr::from(identifier), + options: opts, + }, + )) + } +} + +impl WritablePacket for OpenMessage { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf: Vec = vec![0; 10]; + buf[0] = self.version; + NetworkEndian::write_u16(&mut buf.as_mut_slice()[1..3], self.asn); + NetworkEndian::write_u16(&mut buf.as_mut_slice()[3..5], self.hold_time); + buf[5..9].clone_from_slice(&self.identifier.octets()); + let mut oplen: u8 = 0; + for opt in &self.options { + buf.append(&mut (*opt).to_wire(ctx)?); + oplen += ((*opt).wire_len(ctx)?) as u8; + } + buf[9] = oplen; + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + let mut count: usize = 10; + for opt in &self.options { + count += (*opt).to_wire(ctx)?.len(); + } + Ok(count + .try_into() + .map_err(|_| "overflow in wire_len in OpenMessage")?) + } +} + +impl Display for OpenMessage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "OpenMessage: [version: {}, asn: {}, hold_time: {}, identifier: {}, options: [", + self.version, self.asn, self.hold_time, self.identifier + )?; + for option in &self.options { + fmt::Display::fmt(option, f)?; + } + write!(f, "]]") + } +} + +/// UPDATE message and subtypes. +#[derive(Debug, PartialEq)] +pub struct UpdateMessage { + pub withdrawn_nlri: Vec, + pub path_attributes: Vec, + pub announced_nlri: Vec, +} + +impl ReadablePacket for UpdateMessage { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, wd_nlris): (_, Vec) = nom::multi::length_value( + be_u16, + nom::multi::many0(|i| NLRI::from_wire(&ctx.clone(), i)), + )(buf)?; + let (buf, pattrs): (_, Vec) = nom::multi::length_value( + be_u16, + nom::multi::many0(|i| PathAttribute::from_wire(ctx, i)), + )(buf)?; + let (buf, ann_nlri): (_, Vec) = + nom::multi::many0(|i| NLRI::from_wire(&ctx.clone(), i))(buf)?; + Ok(( + buf, + UpdateMessage { + withdrawn_nlri: wd_nlris, + path_attributes: pattrs, + announced_nlri: ann_nlri, + }, + )) + } +} + +impl WritablePacket for UpdateMessage { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf: Vec = Vec::new(); + let mut tmp: &mut [u8] = &mut [0u8; 2]; + let mut wd_len: u16 = 0; + for wd in &self.withdrawn_nlri { + wd_len += wd.wire_len(ctx)?; + } + NetworkEndian::write_u16(&mut tmp, wd_len); + buf.append(&mut tmp.to_vec()); + for wd in &self.withdrawn_nlri { + buf.extend(wd.to_wire(ctx)?); + } + let mut pattr_len: u16 = 0; + for pattr in &self.path_attributes { + pattr_len += pattr.wire_len(ctx)?; + } + NetworkEndian::write_u16(&mut tmp, pattr_len); + buf.extend(tmp.to_vec()); + for pattr in &self.path_attributes { + buf.extend(pattr.to_wire(ctx)?); + } + for ann in &self.announced_nlri { + buf.extend(ann.to_wire(ctx)?); + } + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + let mut ctr: u16 = 0; + ctr += 2; + for wd in &self.withdrawn_nlri { + ctr += wd.wire_len(ctx)?; + } + ctr += 2; + for pa in &self.path_attributes { + ctr += pa.wire_len(ctx)?; + } + for ann in &self.announced_nlri { + ctr += ann.wire_len(ctx)?; + } + Ok(ctr) + } +} + +impl Display for UpdateMessage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "UpdateMessage [ withdrawn: ")?; + for withdrawn_nlri in &self.withdrawn_nlri { + fmt::Display::fmt(withdrawn_nlri, f)?; + } + for announced_nlri in &self.announced_nlri { + fmt::Display::fmt(announced_nlri, f)?; + } + for path_attr in &self.path_attributes { + fmt::Display::fmt(path_attr, f)?; + } + write!(f, " ]") + } +} + +#[cfg(test)] +mod tests { + use super::BGPMessage; + use super::Codec; + use crate::bgp_packet::constants::AddressFamilyIdentifier::Ipv6; + use crate::bgp_packet::messages::AddressFamilyIdentifier::Ipv4; + use crate::bgp_packet::traits::ParserContext; + use crate::bgp_packet::traits::ReadablePacket; + use crate::bgp_packet::traits::WritablePacket; + + use bytes::BufMut; + use tokio_util::codec::{Decoder, Encoder}; + + #[test] + fn test_open_msg() { + let open_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x00, 0x39, 0x01, 0x04, 0x00, 0x2a, 0x00, 0xb4, 0xd4, 0x19, 0x16, 0x26, + 0x1c, 0x02, 0x06, 0x01, 0x04, 0x00, 0x01, 0x00, 0x01, 0x02, 0x02, 0x80, 0x00, 0x02, + 0x02, 0x02, 0x00, 0x02, 0x02, 0x46, 0x00, 0x02, 0x06, 0x41, 0x04, 0x00, 0x00, 0x00, + 0x2a, + ]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv4); + let (buf, result) = BGPMessage::from_wire(ctx, open_msg_bytes).unwrap(); + assert_eq!(buf.len(), 0); + + let want_str = "OpenMessage: [version: 4, asn: 42, hold_time: 180, identifier: 212.25.22.38, options: [OpenOption: Capabilities: Capabilities: [MultiprotocolCapbility: [ Ipv4 Unicast ]]OpenOption: Capabilities: Capabilities: [UnknownCapability type: 128]OpenOption: Capabilities: Capabilities: [RouteRefreshCapability]OpenOption: Capabilities: Capabilities: [UnknownCapability type: 70]OpenOption: Capabilities: Capabilities: [FourByteASN: asn: 42]]]"; + assert_eq!(format!("{}", result), want_str); + + let wire: Vec = result.to_wire(ctx).unwrap(); + assert_eq!(wire, open_msg_bytes); + } + + #[test] + fn test_open_msg_ipv6() { + let open_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x00, 0x35, 0x01, 0x04, 0x22, 0x36, 0x00, 0xb4, 0xd4, 0x19, 0x1b, 0x2d, + 0x18, 0x02, 0x06, 0x01, 0x04, 0x00, 0x02, 0x00, 0x01, 0x02, 0x02, 0x02, 0x00, 0x02, + 0x02, 0x80, 0x00, 0x02, 0x06, 0x41, 0x04, 0x00, 0x00, 0x22, 0x36, + ]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv4); + let (buf, result) = BGPMessage::from_wire(ctx, open_msg_bytes).unwrap(); + assert_eq!(buf.len(), 0); + + let want_str = "OpenMessage: [version: 4, asn: 8758, hold_time: 180, identifier: 212.25.27.45, options: [OpenOption: Capabilities: Capabilities: [MultiprotocolCapbility: [ Ipv6 Unicast ]]OpenOption: Capabilities: Capabilities: [RouteRefreshCapability]OpenOption: Capabilities: Capabilities: [UnknownCapability type: 128]OpenOption: Capabilities: Capabilities: [FourByteASN: asn: 8758]]]"; + assert_eq!(format!("{}", result), want_str); + + let wire: Vec = result.to_wire(ctx).unwrap(); + assert_eq!(wire, open_msg_bytes); + } + + #[test] + fn test_update_msg_simple() { + let update_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x00, 0x79, 0x02, 0x00, 0x00, 0x00, 0x5e, 0x40, 0x01, 0x01, 0x02, 0x40, + 0x02, 0x16, 0x02, 0x05, 0x00, 0x00, 0x9a, 0x74, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, + 0x73, 0xfb, 0x00, 0x00, 0x05, 0x13, 0x00, 0x00, 0x12, 0x83, 0x40, 0x03, 0x04, 0xb9, + 0x5f, 0xdb, 0x24, 0xc0, 0x08, 0x1c, 0x05, 0x13, 0x88, 0xb8, 0x73, 0xfb, 0x0f, 0xa0, + 0x73, 0xfb, 0x0f, 0xb5, 0x9a, 0x74, 0x0f, 0xa0, 0x9a, 0x74, 0x0f, 0xaa, 0xdf, 0x1e, + 0x07, 0xd0, 0xdf, 0x1e, 0x07, 0xda, 0xc0, 0x20, 0x18, 0x00, 0x00, 0xdf, 0x1e, 0x00, + 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x0a, 0x18, 0xcb, 0x01, 0x4e, + ]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv4); + let (buf, result) = BGPMessage::from_wire(ctx, update_msg_bytes).unwrap(); + assert_eq!(buf.len(), 0); + + let want_str = "UpdateMessage [ withdrawn: 203.1.78.0/24Origin: UnknownAS Path: { Segment [ Type: AS_SEGMENT 39540 57118 29691 1299 4739 ]] }NextHop: 185.95.219.36Communities: [ 1299:35000, 29691:4000, 29691:4021, 39540:4000, 39540:4010, 57118:2000, 57118:2010, ] LargeCommunities: [ 57118:20:0, 57118:20:10, ] ]"; + assert_eq!(format!("{}", result), want_str); + + let reencoded = result.to_wire(&ctx).unwrap(); + assert_eq!(&reencoded, update_msg_bytes); + } + + #[test] + fn test_insufficient_decode() { + let update_msg_bytes: &[u8] = &[0xff, 0xff, 0xff, 0xff, 0xff]; + let codec = &mut Codec { + ctx: ParserContext { + four_octet_asn: Some(true), + nlri_mode: Some(Ipv6), + }, + }; + let mut buf = bytes::BytesMut::from(update_msg_bytes); + let result = codec.decode(&mut buf); + assert!(result.is_ok()); + assert!(result.unwrap().is_none()); + assert_eq!(buf.len(), 5); + } + + #[test] + fn test_exact_decode_encode() { + let update_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x00, 0x79, 0x02, 0x00, 0x00, 0x00, 0x5e, 0x40, 0x01, 0x01, 0x02, 0x40, + 0x02, 0x16, 0x02, 0x05, 0x00, 0x00, 0x9a, 0x74, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, + 0x73, 0xfb, 0x00, 0x00, 0x05, 0x13, 0x00, 0x00, 0x12, 0x83, 0x40, 0x03, 0x04, 0xb9, + 0x5f, 0xdb, 0x24, 0xc0, 0x08, 0x1c, 0x05, 0x13, 0x88, 0xb8, 0x73, 0xfb, 0x0f, 0xa0, + 0x73, 0xfb, 0x0f, 0xb5, 0x9a, 0x74, 0x0f, 0xa0, 0x9a, 0x74, 0x0f, 0xaa, 0xdf, 0x1e, + 0x07, 0xd0, 0xdf, 0x1e, 0x07, 0xda, 0xc0, 0x20, 0x18, 0x00, 0x00, 0xdf, 0x1e, 0x00, + 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x0a, 0x18, 0xcb, 0x01, 0x4e, + ]; + let codec = &mut Codec { + ctx: ParserContext { + four_octet_asn: Some(true), + nlri_mode: Some(Ipv6), + }, + }; + let mut buf = bytes::BytesMut::from(update_msg_bytes); + let result = codec.decode(&mut buf).unwrap(); + assert!(result.is_some()); + assert_eq!(buf.len(), 0); + codec.encode(result.unwrap(), &mut buf).unwrap(); + print!("Output bytes: "); + for b in &buf { + print!("0x{:02x}, ", b); + } + assert_eq!(buf.as_ref(), update_msg_bytes.as_ref()); + } + + #[test] + fn test_multi_msg_codec_decode() { + let update_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x00, 0x79, 0x02, 0x00, 0x00, 0x00, 0x5e, 0x40, 0x01, 0x01, 0x02, 0x40, + 0x02, 0x16, 0x02, 0x05, 0x00, 0x00, 0x9a, 0x74, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, + 0x73, 0xfb, 0x00, 0x00, 0x05, 0x13, 0x00, 0x00, 0x12, 0x83, 0x40, 0x03, 0x04, 0xb9, + 0x5f, 0xdb, 0x24, 0xc0, 0x08, 0x1c, 0x05, 0x13, 0x88, 0xb8, 0x73, 0xfb, 0x0f, 0xa0, + 0x73, 0xfb, 0x0f, 0xb5, 0x9a, 0x74, 0x0f, 0xa0, 0x9a, 0x74, 0x0f, 0xaa, 0xdf, 0x1e, + 0x07, 0xd0, 0xdf, 0x1e, 0x07, 0xda, 0xc0, 0x20, 0x18, 0x00, 0x00, 0xdf, 0x1e, 0x00, + 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x0a, 0x18, 0xcb, 0x01, 0x4e, + // Add part of a second message which is incomplete + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x00, + ]; + let codec = &mut Codec { + ctx: ParserContext { + four_octet_asn: Some(true), + nlri_mode: Some(Ipv6), + }, + }; + let mut buf = bytes::BytesMut::from(update_msg_bytes); + let result = codec.decode(&mut buf); + assert!(result.is_ok()); + assert!(result.unwrap().is_some()); + assert_eq!(buf.len(), 17); + // Add the rest of the message into buf. + buf.put_slice(&[ + 0x79, 0x02, 0x00, 0x00, 0x00, 0x5e, 0x40, 0x01, 0x01, 0x02, 0x40, 0x02, 0x16, 0x02, + 0x05, 0x00, 0x00, 0x9a, 0x74, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, 0x73, 0xfb, 0x00, + 0x00, 0x05, 0x13, 0x00, 0x00, 0x12, 0x83, 0x40, 0x03, 0x04, 0xb9, 0x5f, 0xdb, 0x24, + 0xc0, 0x08, 0x1c, 0x05, 0x13, 0x88, 0xb8, 0x73, 0xfb, 0x0f, 0xa0, 0x73, 0xfb, 0x0f, + 0xb5, 0x9a, 0x74, 0x0f, 0xa0, 0x9a, 0x74, 0x0f, 0xaa, 0xdf, 0x1e, 0x07, 0xd0, 0xdf, + 0x1e, 0x07, 0xda, 0xc0, 0x20, 0x18, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, 0x00, 0x14, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, + 0x00, 0x0a, 0x18, 0xcb, 0x01, 0x4e, + ]); + let result2 = codec.decode(&mut buf); + assert!(result2.is_ok()); + assert!(result2.unwrap().is_some()); + assert_eq!(buf.len(), 0); + } +} diff --git a/bgpd/src/bgp_packet/mod.rs b/bgpd/src/bgp_packet/mod.rs new file mode 100644 index 0000000..e65a998 --- /dev/null +++ b/bgpd/src/bgp_packet/mod.rs @@ -0,0 +1,26 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Implements parsers / serializers for the Border Gateway Protocol wire format. +//! RFC4271 + +// Meta +pub mod constants; +pub mod traits; + +// Parsers +pub mod capabilities; +pub mod messages; +pub mod nlri; +pub mod path_attributes; diff --git a/bgpd/src/bgp_packet/nlri.rs b/bgpd/src/bgp_packet/nlri.rs new file mode 100644 index 0000000..065ccbf --- /dev/null +++ b/bgpd/src/bgp_packet/nlri.rs @@ -0,0 +1,353 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::constants::AddressFamilyIdentifier; +use crate::bgp_packet::traits::BGPParserError; +use crate::bgp_packet::traits::ParserContext; +use crate::bgp_packet::traits::ReadablePacket; +use crate::bgp_packet::traits::WritablePacket; +use nom::bytes::complete::take; +use nom::number::complete::be_u8; +use nom::Err::Failure; +use nom::IResult; +use serde::Serialize; +use std::convert::TryFrom; +use std::convert::TryInto; +use std::fmt; +use std::io::ErrorKind; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::str::FromStr; + +// NLRI here is the Neighbor Link Reachability Information from RFC 4271. +// Other NLRIs such as MP Reach NLRI are implemented as path attributes. +#[derive(Debug, PartialEq, Eq, Clone, Serialize, Hash)] +pub struct NLRI { + pub afi: AddressFamilyIdentifier, + pub prefixlen: u8, + pub prefix: Vec, +} + +impl NLRI { + pub fn from_bytes( + afi: AddressFamilyIdentifier, + prefix: Vec, + prefixlen: u8, + ) -> Result { + // Check that the vector has enough bytes to represent the prefix. + if prefix.len() < ((prefixlen + 7) / 8).into() { + return Err(format!( + "Prefix: {:?}/{} does not have enough bytes in prefix for given prefixlen", + prefix, prefixlen + )); + } + Ok(NLRI { + afi, + prefixlen, + prefix, + }) + } +} + +impl ReadablePacket for NLRI { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + // plen is the length in bits of the address. + let (buf, prefixlen) = be_u8(buf)?; + let octet_len = (prefixlen + 7) / 8; + let (buf, prefix) = take(octet_len)(buf)?; + + match ctx.nlri_mode { + None => { + return Err(Failure(BGPParserError::CustomText( + "nlri_mode not set in the context for NLRI::from_wire".to_string(), + ))); + } + Some(afi) => Ok(( + buf, + NLRI { + afi, + prefixlen, + prefix: prefix.to_vec(), + }, + )), + } + } +} + +impl TryFrom for Ipv6Addr { + type Error = String; + + fn try_from(value: NLRI) -> Result { + match value.afi { + AddressFamilyIdentifier::Ipv6 => { + let mut v: [u8; 16] = [0u8; 16]; + if value.prefix.len() > v.len() { + return Err("prefix length greater than IPv6 address length".to_string()); + } + for (pos, e) in value.prefix.iter().enumerate() { + v[pos] = *e; + } + let ip6: Ipv6Addr = v.into(); + Ok(ip6) + } + _ => Err("Unsupported AFI type".to_string()), + } + } +} + +impl TryFrom for Ipv4Addr { + type Error = String; + + fn try_from(value: NLRI) -> Result { + match value.afi { + AddressFamilyIdentifier::Ipv4 => { + let mut v: [u8; 4] = [0u8; 4]; + if value.prefix.len() > v.len() { + return Err("prefix length greater than IPv4 address length".to_string()); + } + for (pos, e) in value.prefix.iter().enumerate() { + v[pos] = *e; + } + let ip4 = Ipv4Addr::new(v[0], v[1], v[2], v[3]); + Ok(ip4) + } + _ => Err("Unsupported AFI type".to_string()), + } + } +} + +impl TryInto for NLRI { + type Error = std::io::Error; + fn try_into(self) -> Result { + match self.afi { + AddressFamilyIdentifier::Ipv4 => { + let mut v: [u8; 4] = [0u8; 4]; + if self.prefix.len() > v.len() { + return Err(std::io::Error::new( + ErrorKind::InvalidData, + "prefix length greater than IPv4 address length", + )); + } + for (pos, e) in self.prefix.iter().enumerate() { + v[pos] = *e; + } + let ip4 = Ipv4Addr::new(v[0], v[1], v[2], v[3]); + Ok(IpAddr::V4(ip4)) + } + AddressFamilyIdentifier::Ipv6 => { + let mut v: [u8; 16] = [0u8; 16]; + if self.prefix.len() > v.len() { + return Err(std::io::Error::new( + ErrorKind::InvalidData, + "prefix length greater than IPv6 address length", + )); + } + for (pos, e) in self.prefix.iter().enumerate() { + v[pos] = *e; + } + let ip6: Ipv6Addr = v.into(); + Ok(IpAddr::V6(ip6)) + } + } + } +} + +impl TryFrom for NLRI { + type Error = String; + fn try_from(value: String) -> Result { + let parts: Vec<&str> = value.split("/").collect(); + if parts.len() != 2 { + return Err(format!("Expected ip_addr/prefixlen but got: {}", value)); + } + + let prefixlen: u8 = u8::from_str(parts[1]).map_err(|_| "failed to parse prefixlen")?; + let mut octets: Vec; + let afi: AddressFamilyIdentifier; + + if parts[0].contains(":") { + afi = AddressFamilyIdentifier::Ipv6; + let addr: Ipv6Addr = Ipv6Addr::from_str(parts[0]).map_err(|e| e.to_string())?; + octets = addr.octets().to_vec(); + } else if parts[0].contains(".") { + afi = AddressFamilyIdentifier::Ipv4; + let addr: Ipv4Addr = Ipv4Addr::from_str(parts[0]).map_err(|e| e.to_string())?; + octets = addr.octets().to_vec(); + } else { + return Err(format!("Could not detect IP address type: {}", parts[0])); + } + + // Truncate octets to prefixlen + if prefixlen % 8 == 0 { + // Cleanly truncate. + octets.truncate((prefixlen / 8).into()); + } else { + let num_bytes = (prefixlen / 8) + 1; + let mask = u8::MAX << (8 - (prefixlen % 8)); + octets.truncate(num_bytes.into()); + if octets.len() > 0 { + let last_pos = octets.len() - 1; + octets[last_pos] &= mask; + } + } + + Ok(NLRI { + afi, + prefixlen, + prefix: octets, + }) + } +} + +impl WritablePacket for NLRI { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + let mut buf: Vec = Vec::new(); + buf.push(self.prefixlen); + buf.extend(self.prefix.as_slice()); + Ok(buf) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(1 + self.prefix.len() as u16) + } +} + +impl fmt::Display for NLRI { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.afi { + AddressFamilyIdentifier::Ipv4 => { + let bytes = &mut self.prefix.clone(); + if bytes.len() < 4 { + bytes.extend(std::iter::repeat(0).take(4 - bytes.len())); + } + let four_bytes: [u8; 4] = bytes.as_slice().try_into().map_err(|_| fmt::Error {})?; + let ipv4_addr = Ipv4Addr::from(four_bytes); + write!(f, "{}/{}", ipv4_addr, self.prefixlen) + } + AddressFamilyIdentifier::Ipv6 => { + let bytes = &mut self.prefix.clone(); + if bytes.len() < 16 { + bytes.extend(std::iter::repeat(0).take(16 - bytes.len())); + } + let sixteen_bytes: [u8; 16] = + bytes.as_slice().try_into().map_err(|_| fmt::Error {})?; + let ipv6_addr = Ipv6Addr::from(sixteen_bytes); + write!(f, "{}/{}", ipv6_addr, self.prefixlen) + } + } + } +} + +#[cfg(test)] +mod tests { + use std::convert::TryFrom; + + use super::NLRI; + use crate::bgp_packet::constants::AddressFamilyIdentifier::{Ipv4, Ipv6}; + use crate::bgp_packet::traits::ParserContext; + use crate::bgp_packet::traits::ReadablePacket; + use crate::bgp_packet::traits::WritablePacket; + + #[test] + fn test_basic_nlri_v6() { + let nlri_bytes: &[u8] = &[0x20, 0x20, 0x01, 0xdb, 0x8]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let nlri_res: (&[u8], NLRI) = NLRI::from_wire(ctx, nlri_bytes).unwrap(); + assert_eq!(nlri_res.1.afi, Ipv6); + assert_eq!(nlri_res.1.prefixlen, 32); + assert_eq!(nlri_res.1.prefix, vec![0x20, 0x01, 0xdb, 0x8]); + assert_eq!(nlri_res.0.len(), 0); + + let wire: Vec = nlri_res.1.to_wire(ctx).unwrap(); + assert_eq!(wire.as_slice(), nlri_bytes); + assert_eq!(nlri_res.1.wire_len(ctx).unwrap() as usize, wire.len()); + } + + #[test] + fn test_basic_nlri_v4() { + let nlri_bytes: &[u8] = &[0x18, 192, 168, 1]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv4); + let nlri_res: (&[u8], NLRI) = NLRI::from_wire(ctx, nlri_bytes).unwrap(); + assert_eq!(nlri_res.1.afi, Ipv4); + assert_eq!(nlri_res.1.prefixlen, 24); + assert_eq!(nlri_res.1.prefix, vec![192, 168, 1]); + assert_eq!(nlri_res.0.len(), 0); + + let wire: Vec = nlri_res.1.to_wire(ctx).unwrap(); + assert_eq!(wire.as_slice(), nlri_bytes); + assert_eq!(nlri_res.1.wire_len(ctx).unwrap() as usize, wire.len()); + } + + #[test] + fn test_string_roundtrip() { + let cases: Vec<(String, Vec, u8, String)> = vec![ + ( + "2001:db8::/32".into(), + vec![0x20, 0x01, 0xd, 0xb8], + 32, + "2001:db8::/32".into(), + ), + ( + "2001:db8::1/16".into(), + vec![0x20, 0x01], + 16, + "2001::/16".into(), + ), + ( + "2001:db8::/64".into(), + vec![0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0], + 64, + "2001:db8::/64".into(), + ), + ( + "2001:db8::/24".into(), + vec![0x20, 0x01, 0xd], + 24, + "2001:d00::/24".into(), + ), + ("2001:db8::/0".into(), vec![], 0, "::/0".into()), + ("::/0".into(), vec![], 0, "::/0".into()), + ("10.0.0.0/8".into(), vec![10], 8, "10.0.0.0/8".into()), + ]; + + for (i, case) in cases.iter().enumerate() { + let parsed_nlri = NLRI::try_from(case.0.clone()).unwrap(); + assert_eq!(parsed_nlri.prefix, case.1, "Check prefix match ({})", i); + assert_eq!( + parsed_nlri.prefixlen, case.2, + "Check prefixlen match ({})", + i + ); + assert_eq!( + case.3, + format!("{}", parsed_nlri), + "Check std::fmt::Display match ({})", + i + ); + } + } + + // #[test] + // fn test_to_string_invalids() { + // let invalid_v4 = NLRI { + // afi: AddressFamilyIdentifier::Ipv4, + // prefix: vec![1, 2, 3, 4, 5], + // prefixlen: 16, + // }; + // assert_eq!( + // "a formatting trait implementation returned an error: Error", + // format!("{}", invalid_v4) + // ); + // } +} diff --git a/bgpd/src/bgp_packet/path_attributes.rs b/bgpd/src/bgp_packet/path_attributes.rs new file mode 100644 index 0000000..2870295 --- /dev/null +++ b/bgpd/src/bgp_packet/path_attributes.rs @@ -0,0 +1,1226 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::constants::AddressFamilyIdentifier; +use crate::bgp_packet::constants::SubsequentAddressFamilyIdentifier; +use crate::bgp_packet::nlri::NLRI; +use crate::bgp_packet::traits::BGPParserError; +use crate::bgp_packet::traits::ParserContext; +use crate::bgp_packet::traits::ReadablePacket; +use crate::bgp_packet::traits::WritablePacket; +use byteorder::ByteOrder; +use byteorder::NetworkEndian; +use nom::number::complete::{be_u16, be_u32, be_u8}; +use nom::Err::Failure; +use nom::IResult; +use serde::Serialize; +use std::convert::TryInto; +use std::fmt; +use std::net::Ipv4Addr; +use std::net::Ipv6Addr; + +/// PathAttribute represents path attributes in a BGP Update message. +#[derive(Debug, PartialEq, Clone, Serialize)] +pub enum PathAttribute { + OriginPathAttribute(OriginPathAttribute), + ASPathAttribute(ASPathAttribute), + NextHopPathAttribute(NextHopPathAttribute), + MultiExitDiscPathAttribute(MultiExitDiscPathAttribute), + LocalPrefPathAttribute(LocalPrefPathAttribute), + AtomicAggregatePathAttribute(AtomicAggregatePathAttribute), + AggregatorPathAttribute(AggregatorPathAttribute), + CommunitiesPathAttribute(CommunitiesPathAttribute), + ExtendedCommunitiesPathAttribute(ExtendedCommunitiesPathAttribute), + LargeCommunitiesPathAttribute(LargeCommunitiesPathAttribute), + MPReachNLRIPathAttribute(MPReachNLRIPathAttribute), + MPUnreachNLRIPathAttribute(MPUnreachNLRIPathAttribute), + UnknownPathAttribute(Vec), +} + +const PATH_ATTRIBUTE_FLAG_OPTONAL: u8 = 0x80; // when set to 1: optional, well-known: 0. +const PATH_ATTRIBUTE_FLAG_TRANSITIVE: u8 = 0x40; // when set to 1: transitive, non-transitive: 0. +const _PATH_ATTRIBUTE_FLAG_PARTIAL: u8 = 0x20; // when set to 1: partial, complete: 0. +const PATH_ATTRIBUTE_EXTENDED_LENGTH: u8 = 0x10; // when set to 1: length is u16, otherwise when 0 length is u8. + // For well known attributes the transitive bit MUST be set to 1. + +// Write the type, length and call the child serializer +impl WritablePacket for PathAttribute { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + Ok(match self { + PathAttribute::OriginPathAttribute(a) => { + let typ: u8 = 1; + let flag: u8 = PATH_ATTRIBUTE_FLAG_TRANSITIVE; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::ASPathAttribute(a) => { + let typ: u8 = 2; + let flag: u8 = PATH_ATTRIBUTE_FLAG_TRANSITIVE; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::NextHopPathAttribute(a) => { + let typ: u8 = 3; + let flag: u8 = PATH_ATTRIBUTE_FLAG_TRANSITIVE; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::MultiExitDiscPathAttribute(a) => { + let typ: u8 = 4; + let flag: u8 = PATH_ATTRIBUTE_FLAG_OPTONAL; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::LocalPrefPathAttribute(a) => { + let typ: u8 = 5; + let flag: u8 = 0; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::AtomicAggregatePathAttribute(a) => { + let typ: u8 = 6; + let flag: u8 = 0; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::AggregatorPathAttribute(a) => { + let typ: u8 = 7; + let flag: u8 = PATH_ATTRIBUTE_FLAG_OPTONAL | PATH_ATTRIBUTE_FLAG_TRANSITIVE; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::CommunitiesPathAttribute(a) => { + let typ: u8 = 8; + let flag: u8 = PATH_ATTRIBUTE_FLAG_OPTONAL | PATH_ATTRIBUTE_FLAG_TRANSITIVE; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::MPReachNLRIPathAttribute(a) => { + let typ: u8 = 14; + let flag: u8 = PATH_ATTRIBUTE_FLAG_OPTONAL; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::MPUnreachNLRIPathAttribute(a) => { + let typ: u8 = 15; + let flag: u8 = PATH_ATTRIBUTE_FLAG_OPTONAL; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::ExtendedCommunitiesPathAttribute(a) => { + let typ: u8 = 16; + let flag: u8 = PATH_ATTRIBUTE_FLAG_OPTONAL | PATH_ATTRIBUTE_FLAG_TRANSITIVE; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::LargeCommunitiesPathAttribute(a) => { + let typ: u8 = 32; + let flag: u8 = PATH_ATTRIBUTE_FLAG_OPTONAL | PATH_ATTRIBUTE_FLAG_TRANSITIVE; + let len: u8 = a.wire_len(ctx)? as u8; + [vec![flag, typ, len], a.to_wire(ctx)?].concat() + } + PathAttribute::UnknownPathAttribute(u) => u.to_vec(), + }) + } + + fn wire_len(&self, ctx: &ParserContext) -> Result { + Ok(match self { + PathAttribute::OriginPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::ASPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::NextHopPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::MultiExitDiscPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::LocalPrefPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::AtomicAggregatePathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::AggregatorPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::CommunitiesPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::MPReachNLRIPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::MPUnreachNLRIPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::ExtendedCommunitiesPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::LargeCommunitiesPathAttribute(a) => 3 + a.wire_len(ctx)?, + PathAttribute::UnknownPathAttribute(u) => u.len() as u16, + }) + } +} + +// Read the type, length and dispatch accordingly. +impl ReadablePacket for PathAttribute { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, flag) = be_u8(buf)?; + let (buf, typ) = be_u8(buf)?; + let mut len8 = 0; // to preserve the 1 octet length for the unknown option. + let (buf, len) = match flag & PATH_ATTRIBUTE_EXTENDED_LENGTH { + PATH_ATTRIBUTE_EXTENDED_LENGTH => be_u16(buf)?, + _ => { + let (b, t) = be_u8(buf)?; + len8 = t; + (b, t as u16) + } + }; + // Explicitly read the attribute here and pass the attribute only buffer to the child parser. + let (buf, pa_buf) = nom::bytes::complete::take(len)(buf)?; + let (_, res): (_, PathAttribute) = match typ { + 1 => { + let (b, r) = OriginPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::OriginPathAttribute(r)) + } + 2 => { + let (b, r) = ASPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::ASPathAttribute(r)) + } + 3 => { + let (b, r) = NextHopPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::NextHopPathAttribute(r)) + } + 4 => { + let (b, r) = MultiExitDiscPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::MultiExitDiscPathAttribute(r)) + } + 5 => { + let (b, r) = LocalPrefPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::LocalPrefPathAttribute(r)) + } + 6 => { + let (b, r) = AtomicAggregatePathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::AtomicAggregatePathAttribute(r)) + } + 7 => { + let (b, r) = AggregatorPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::AggregatorPathAttribute(r)) + } + 8 => { + let (b, r) = CommunitiesPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::CommunitiesPathAttribute(r)) + } + 14 => { + let (b, r) = MPReachNLRIPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::MPReachNLRIPathAttribute(r)) + } + 15 => { + let (b, r) = MPUnreachNLRIPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::MPUnreachNLRIPathAttribute(r)) + } + 16 => { + let (b, r) = ExtendedCommunitiesPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::ExtendedCommunitiesPathAttribute(r)) + } + 32 => { + let (b, r) = LargeCommunitiesPathAttribute::from_wire(ctx, pa_buf)?; + (b, PathAttribute::LargeCommunitiesPathAttribute(r)) + } + _ => { + let mut tmp = vec![flag, typ]; + if len8 != 0 { + tmp.push(len8); + } else { + let mut t = [0u8; 2]; + byteorder::NetworkEndian::write_u16(&mut t, len); + tmp.append(&mut t.to_vec()); + } + tmp.extend(pa_buf.to_vec()); + (&[], PathAttribute::UnknownPathAttribute(tmp)) + } + }; + Ok((buf, res)) + } +} + +impl fmt::Display for PathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PathAttribute::OriginPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::ASPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::NextHopPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::MultiExitDiscPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::LocalPrefPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::AtomicAggregatePathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::AggregatorPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::CommunitiesPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::MPReachNLRIPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::MPUnreachNLRIPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::ExtendedCommunitiesPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::LargeCommunitiesPathAttribute(a) => std::fmt::Display::fmt(&a, f), + PathAttribute::UnknownPathAttribute(a) => { + write!(f, "unknown PathAttribute, bytes: {:?}", a) + } + } + } +} + +// Path attribute implementations. + +/// Origin path attribute is a mandatory attribute defined in RFC4271. +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct OriginPathAttribute(pub u8); + +pub mod origin_path_attribute_values { + use super::OriginPathAttribute; + + pub const IGP: OriginPathAttribute = OriginPathAttribute(0); + pub const EGP: OriginPathAttribute = OriginPathAttribute(1); + pub const UNKNOWN: OriginPathAttribute = OriginPathAttribute(2); +} + +impl ReadablePacket for OriginPathAttribute { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, opa) = be_u8(buf)?; + Ok((buf, OriginPathAttribute(opa))) + } +} + +impl WritablePacket for OriginPathAttribute { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + Ok(vec![self.0]) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(1) + } +} + +impl fmt::Display for OriginPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use origin_path_attribute_values::*; + match self { + &IGP => write!(f, "Origin: IGP"), + &EGP => write!(f, "Origin: EGP"), + &UNKNOWN => write!(f, "Origin: Unknown"), + _ => write!(f, "Origin: invalid value"), + } + } +} + +/// ASPathAttribute is a well-known mandatory attribute that contains a list of TLV encoded path +/// segments. Type is either 1 for AS_SET or 2 for AS_SEQUENCE, length is a 1 octet field +/// containing the number of ASNS and the value contains the ASNs. This is defined in Section 4.3 +/// of RFC4271. + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct ASPathAttribute { + pub segments: Vec, +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct ASPathSegment { + /// ordered is true when representing an AS_SEQUENCE, andd false when + /// representing an AS_SET. + pub ordered: bool, + /// Path is the list of ASNs. + pub path: Vec, +} + +impl ASPathAttribute { + pub fn from_asns(asns: Vec) -> PathAttribute { + let segment = ASPathSegment { + ordered: true, + path: asns, + }; + PathAttribute::ASPathAttribute(ASPathAttribute { + segments: vec![segment], + }) + } +} + +impl ReadablePacket for ASPathAttribute { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let parse_segment = |ctx: &ParserContext, + buf: &'a [u8]| + -> IResult<&'a [u8], ASPathSegment, BGPParserError<&'a [u8]>> { + let (buf, typ) = be_u8(buf)?; + let (buf, len) = be_u8(buf)?; + let (buf, asns): (_, Vec) = match ctx.four_octet_asn { + Some(true) => nom::multi::many_m_n(len as usize, len as usize, be_u32)(buf)?, + Some(false) => { + let (buf, asn_u16) = + nom::multi::many_m_n(len as usize, len as usize, be_u16)(buf)?; + let mut asn_u32: Vec = Vec::new(); + for asn in asn_u16 { + asn_u32.push(asn as u32); + } + (buf, asn_u32) + } + None => { + return Err(Failure(BGPParserError::CustomText( + "Can't parse ASPath without four_octet_asn being set".to_owned(), + ))); + } + }; + + Ok(( + buf, + ASPathSegment { + ordered: (typ == 2), + path: asns, + }, + )) + }; + + let (buf, segments): (_, Vec) = + nom::multi::many0(|buf: &'a [u8]| parse_segment(ctx, buf))(buf)?; + + Ok((buf, ASPathAttribute { segments })) + } +} + +impl WritablePacket for ASPathAttribute { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + if !ctx.four_octet_asn.unwrap_or(false) { + return Err( + "Can't use ASPathAttribute to communicate with legacy peer, use AS4PathAttribute", + ); + } + let mut wire: Vec = Vec::new(); + + for segment in &self.segments { + wire.push(if segment.ordered { 2 } else { 1 }); + wire.push( + segment + .path + .len() + .try_into() + .map_err(|_| "ASPath segment too long")?, + ); + for asn in &segment.path { + let mut tmp: Vec = vec![0u8; 4]; + NetworkEndian::write_u32(&mut tmp, *asn); + wire.append(&mut tmp); + } + } + Ok(wire) + } + fn wire_len(&self, _: &ParserContext) -> Result { + let mut ctr: u16 = 0; + for segment in &self.segments { + ctr += 2; + ctr += (4 * segment.path.len()) as u16; + } + Ok(ctr) + } +} + +impl fmt::Display for ASPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "AS Path: {{ ")?; + for segment in &self.segments { + write!(f, "Segment [ ")?; + if segment.ordered { + write!(f, "Type: AS_SEGMENT ")? + } else { + write!(f, "Type: AS_SET ")? + }; + for asn in &segment.path { + write!(f, "{} ", asn)?; + } + write!(f, " ]")?; + } + write!(f, "] }}") + } +} + +// TODO: AS4 path attribute +// Per RFC 6793 the AS4 path attribute is for legacy BGP speakers to propagate +// 4 octet ASNs in update messages. +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct AS4PathAttribute { + pub ordered: bool, + pub path: Vec, +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct NextHopPathAttribute(pub Ipv4Addr); + +impl ReadablePacket for NextHopPathAttribute { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (_, ip_u32) = be_u32(buf)?; + let nexthop = Ipv4Addr::from(ip_u32); + Ok((buf, NextHopPathAttribute(nexthop))) + } +} + +impl WritablePacket for NextHopPathAttribute { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + return Ok(self.0.octets().to_vec()); + } + fn wire_len(&self, _: &ParserContext) -> Result { + return Ok(4); + } +} + +impl fmt::Display for NextHopPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "NextHop: {}", self.0) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct MultiExitDiscPathAttribute(pub u32); + +impl ReadablePacket for MultiExitDiscPathAttribute { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, val) = be_u32(buf)?; + Ok((buf, MultiExitDiscPathAttribute(val))) + } +} + +impl WritablePacket for MultiExitDiscPathAttribute { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + let mut buf: Vec = vec![0u8; 4]; + byteorder::NetworkEndian::write_u32(&mut buf, self.0); + Ok(buf) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(4) + } +} + +impl fmt::Display for MultiExitDiscPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "MultiExitDisc: {}", self.0) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct LocalPrefPathAttribute(pub u32); + +impl ReadablePacket for LocalPrefPathAttribute { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, val) = be_u32(buf)?; + Ok((buf, LocalPrefPathAttribute(val))) + } +} + +impl WritablePacket for LocalPrefPathAttribute { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + let mut buf: Vec = vec![0u8; 4]; + byteorder::NetworkEndian::write_u32(&mut buf, self.0); + Ok(buf) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(4) + } +} + +impl fmt::Display for LocalPrefPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "LocalPref: {}", self.0) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct AtomicAggregatePathAttribute {} + +impl ReadablePacket for AtomicAggregatePathAttribute { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + Ok((buf, AtomicAggregatePathAttribute {})) + } +} + +impl WritablePacket for AtomicAggregatePathAttribute { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + Ok(vec![]) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(0) + } +} + +impl fmt::Display for AtomicAggregatePathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "AtomicAggregate: present") + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct AggregatorPathAttribute { + pub asn: u32, + pub ip: Ipv4Addr, +} + +// TODO: Support non AS4 peers. +impl ReadablePacket for AggregatorPathAttribute { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + if !ctx.four_octet_asn.is_some() { + return Err(Failure(BGPParserError::CustomText( + "Non four byte ASN not supported (AggregatorPathAttribute from_wire)".to_string(), + ))); + } + let (buf, asn) = be_u32(buf)?; + let (buf, ip) = nom::bytes::complete::take(4u8)(buf)?; + let correct: [u8; 4] = ip.try_into().expect("wrong slice len"); + Ok(( + buf, + AggregatorPathAttribute { + asn, + ip: Ipv4Addr::from(correct), + }, + )) + } +} + +impl WritablePacket for AggregatorPathAttribute { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + if !ctx.four_octet_asn.is_some() { + panic!("Non four byte ASN not supported (AggregatorPathAttribute from_wire)"); + } + let mut buf: Vec = vec![0u8; 4]; + byteorder::NetworkEndian::write_u32(&mut buf, self.asn); + buf.extend(self.ip.octets().to_vec()); + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + if !ctx.four_octet_asn.is_some() { + panic!("Non four byte ASN not supported (AggregatorPathAttribute from_wire)"); + } + Ok(8) + } +} + +impl fmt::Display for AggregatorPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Aggregator: asn: {}, ip: {}", self.asn, self.ip) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct CommunitiesPathAttribute { + pub values: Vec, +} + +impl ReadablePacket for CommunitiesPathAttribute { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, values): (_, Vec) = + nom::multi::many0(|i| CommunitiesPayload::from_wire(ctx, i))(buf)?; + Ok((buf, CommunitiesPathAttribute { values })) + } +} + +impl WritablePacket for CommunitiesPathAttribute { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf = vec![]; + for val in &self.values { + buf.extend(val.to_wire(ctx)?); + } + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + let mut ttl: u16 = 0; + for val in &self.values { + ttl += val.wire_len(ctx)?; + } + Ok(ttl) + } +} + +impl fmt::Display for CommunitiesPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Communities: [ ")?; + for c in &self.values { + write!(f, " {}, ", c)?; + } + write!(f, " ] ") + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct CommunitiesPayload { + pub asn: u16, + pub payload: u16, +} + +impl ReadablePacket for CommunitiesPayload { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, asn): (_, u16) = be_u16(buf)?; + let (buf, payload): (_, u16) = be_u16(buf)?; + Ok((buf, CommunitiesPayload { asn, payload })) + } +} + +impl WritablePacket for CommunitiesPayload { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + let mut buf = vec![0u8; 4]; + byteorder::NetworkEndian::write_u16(&mut buf[0..2], self.asn); + byteorder::NetworkEndian::write_u16(&mut buf[2..4], self.payload); + Ok(buf) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(4) + } +} + +impl fmt::Display for CommunitiesPayload { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}", self.asn, self.payload) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct ExtendedCommunitiesPathAttribute { + pub t_high: u8, + // TODO: Handle t_low and subtypes of the Extended Communities attribute as defined in rfc4360. + pub value: Vec, +} + +impl ReadablePacket for ExtendedCommunitiesPathAttribute { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, t_high) = be_u8(buf)?; + let (buf, value) = nom::bytes::complete::take(7u8)(buf)?; + Ok(( + buf, + ExtendedCommunitiesPathAttribute { + t_high, + value: value.to_vec(), + }, + )) + } +} + +impl WritablePacket for ExtendedCommunitiesPathAttribute { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + if !self.value.len() == 7 { + return Err("ExtendedCommunitiesPathAttribute value length != 7"); + } + Ok(vec![vec![self.t_high], self.value.to_owned()].concat()) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(8) + } +} + +impl fmt::Display for ExtendedCommunitiesPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "ExtendedCommunities: {} {:?}", self.t_high, self.value) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct LargeCommunitiesPathAttribute { + pub values: Vec, +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct LargeCommunitiesPayload { + pub global_admin: u32, + pub ld1: u32, + pub ld2: u32, +} + +impl ReadablePacket for LargeCommunitiesPayload { + fn from_wire<'a>( + _: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, global_admin) = be_u32(buf)?; + let (buf, ld1) = be_u32(buf)?; + let (buf, ld2) = be_u32(buf)?; + Ok(( + buf, + LargeCommunitiesPayload { + global_admin, + ld1, + ld2, + }, + )) + } +} + +impl WritablePacket for LargeCommunitiesPayload { + fn to_wire(&self, _: &ParserContext) -> Result, &'static str> { + let mut buf = vec![0u8; 12]; + byteorder::NetworkEndian::write_u32(&mut buf[0..4], self.global_admin); + byteorder::NetworkEndian::write_u32(&mut buf[4..8], self.ld1); + byteorder::NetworkEndian::write_u32(&mut buf[8..12], self.ld2); + Ok(buf) + } + fn wire_len(&self, _: &ParserContext) -> Result { + Ok(12) + } +} + +impl fmt::Display for LargeCommunitiesPayload { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}:{}", self.global_admin, self.ld1, self.ld2) + } +} + +impl ReadablePacket for LargeCommunitiesPathAttribute { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, values): (_, Vec) = + nom::multi::many0(|i| LargeCommunitiesPayload::from_wire(ctx, i))(buf)?; + Ok((buf, LargeCommunitiesPathAttribute { values })) + } +} + +impl WritablePacket for LargeCommunitiesPathAttribute { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf = vec![]; + for val in &self.values { + buf.extend(val.to_wire(ctx)?); + } + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + let mut ttl: u16 = 0; + for val in &self.values { + ttl += val.wire_len(ctx)?; + } + Ok(ttl) + } +} + +impl fmt::Display for LargeCommunitiesPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "LargeCommunities: [")?; + for c in &self.values { + write!(f, " {}, ", c)?; + } + write!(f, "]") + } +} + +/// MPReachPathAattribute implements the MultiProtocol extensions to BGP (RFC4760) +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct MPReachNLRIPathAttribute { + pub afi: AddressFamilyIdentifier, + pub safi: SubsequentAddressFamilyIdentifier, + pub nexthop: Vec, + pub nlris: Vec, +} + +impl MPReachNLRIPathAttribute { + // https://datatracker.ietf.org/doc/html/rfc2545 describes what the nexthop + // field can contain. Returns a tuple of (global_nh, linklocal_nh) + pub fn nexthop_to_v6(self) -> Option<(Ipv6Addr, Option)> { + return match self.nexthop.len() { + 16 => { + let nh_bytes: [u8; 16] = self.nexthop.try_into().unwrap(); + Some((Ipv6Addr::from(nh_bytes), None)) + } + 32 => { + let global_nh_bytes: [u8; 16] = self.nexthop[0..16].try_into().unwrap(); + let llnh_bytes: [u8; 16] = self.nexthop[16..32].try_into().unwrap(); + Some(( + Ipv6Addr::from(global_nh_bytes), + Some(Ipv6Addr::from(llnh_bytes)), + )) + } + _ => None, + }; + } +} + +impl ReadablePacket for MPReachNLRIPathAttribute { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, afi) = AddressFamilyIdentifier::from_wire(ctx, buf)?; + let (buf, safi) = SubsequentAddressFamilyIdentifier::from_wire(ctx, buf)?; + let (buf, nexthop): (_, Vec) = + nom::multi::length_value(be_u8, nom::multi::many0(be_u8))(buf)?; + // Reserved field set to 0. + let (buf, _) = be_u8(buf)?; + let (buf, nlris): (_, Vec) = nom::multi::many0(|i| NLRI::from_wire(ctx, i))(buf)?; + + Ok(( + buf, + MPReachNLRIPathAttribute { + afi, + safi, + nexthop, + nlris, + }, + )) + } +} + +impl WritablePacket for MPReachNLRIPathAttribute { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf = vec![0u8; 4]; + byteorder::NetworkEndian::write_u16(&mut buf[0..2], self.afi.into()); + buf[2] = self.safi.into(); + buf[3] = self.nexthop.len() as u8; + buf.extend(&self.nexthop); + // Reserved field set to 0. + buf.push(0); + for nlri in &self.nlris { + buf.extend(nlri.to_wire(ctx)?); + } + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + let mut ctr: u16 = 0; + ctr += 4; // afi + safi + the (len of nexthop) octet + ctr += self.nexthop.len() as u16; + ctr += 1; // Reserved octet. + for nlri in &self.nlris { + ctr += nlri.wire_len(ctx)?; + } + Ok(ctr) + } +} + +impl fmt::Display for MPReachNLRIPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "MPReachNLRI: afi: {} safi: {}, nexthop: {:?} nlris: [", + self.afi, self.safi, self.nexthop + )?; + for nlri in &self.nlris { + std::fmt::Display::fmt(nlri, f)?; + } + write!(f, "]") + } +} + +/// MPUnreachNLRIPathAttribute represents a MultiProtocol prefix withdrawal. +#[derive(Debug, PartialEq, Eq, Clone, Serialize)] +pub struct MPUnreachNLRIPathAttribute { + pub afi: AddressFamilyIdentifier, + pub safi: SubsequentAddressFamilyIdentifier, + pub nlris: Vec, +} + +impl ReadablePacket for MPUnreachNLRIPathAttribute { + fn from_wire<'a>( + ctx: &ParserContext, + buf: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> { + let (buf, afi) = AddressFamilyIdentifier::from_wire(ctx, buf)?; + let (buf, safi) = SubsequentAddressFamilyIdentifier::from_wire(ctx, buf)?; + let (buf, nlris): (_, Vec) = nom::multi::many0(|i| NLRI::from_wire(ctx, i))(buf)?; + Ok((buf, MPUnreachNLRIPathAttribute { afi, safi, nlris })) + } +} + +impl WritablePacket for MPUnreachNLRIPathAttribute { + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str> { + let mut buf = vec![0u8; 3]; + NetworkEndian::write_u16(&mut buf[0..2], self.afi.into()); + buf[2] = self.safi.into(); + for nlri in &self.nlris { + buf.extend(nlri.to_wire(ctx)?); + } + Ok(buf) + } + fn wire_len(&self, ctx: &ParserContext) -> Result { + let mut ctr: u16 = 0; + ctr += 3; + for nlri in &self.nlris { + ctr += nlri.wire_len(ctx)?; + } + Ok(ctr) + } +} + +impl fmt::Display for MPUnreachNLRIPathAttribute { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "MPUnreachNLRI: afi: {} safi: {}, nlris: [", + self.afi, self.safi + )?; + for nlri in &self.nlris { + std::fmt::Display::fmt(nlri, f)?; + } + write!(f, "]") + } +} + +#[cfg(test)] +mod tests { + use std::net::Ipv4Addr; + + use crate::bgp_packet::constants::AddressFamilyIdentifier::Ipv6; + use crate::bgp_packet::constants::SubsequentAddressFamilyIdentifier::Unicast; + use crate::bgp_packet::traits::ParserContext; + use crate::bgp_packet::traits::ReadablePacket; + use crate::bgp_packet::traits::WritablePacket; + + use super::ASPathAttribute; + use super::CommunitiesPathAttribute; + use super::LargeCommunitiesPathAttribute; + use super::LocalPrefPathAttribute; + use super::MPReachNLRIPathAttribute; + use super::MPUnreachNLRIPathAttribute; + use super::MultiExitDiscPathAttribute; + use super::NextHopPathAttribute; + use super::PathAttribute; + + #[test] + fn test_as_path_segment() { + let as_path_bytes = &[ + 0x02, 0x04, 0x00, 0x00, 0x9a, 0x74, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, 0x20, 0x1a, + 0x00, 0x00, 0x78, 0xfc, + ]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let result = &ASPathAttribute::from_wire(ctx, as_path_bytes).unwrap(); + + let expected_aspath: Vec = vec![39540, 57118, 8218, 30972]; + + assert_eq!(result.1.segments.len(), 1); + assert!(result.1.segments[0].ordered); + assert_eq!(result.1.segments[0].path, expected_aspath); + + let wire = result.1.to_wire(ctx).unwrap(); + assert_eq!(wire, as_path_bytes); + } + + #[test] + fn test_as_path_multi_segment() { + let as_path_bytes = &[ + 0x02, 0x04, 0x00, 0x00, 0x9a, 0x74, 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, 0x20, 0x1a, + 0x00, 0x00, 0x78, 0xfc, 0x01, 0x02, 0x00, 0x00, 0x9a, 0x74, 0x00, 0x00, 0xdf, 0x1e, + ]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let result = &ASPathAttribute::from_wire(ctx, as_path_bytes).unwrap(); + + let expected_aspath: Vec = vec![39540, 57118, 8218, 30972]; + let expected_asset: Vec = vec![39540, 57118]; + + assert_eq!(result.1.segments.len(), 2); + assert!(result.1.segments[0].ordered); + assert_eq!(result.1.segments[0].path, expected_aspath); + assert!(!result.1.segments[1].ordered); + assert_eq!(result.1.segments[1].path, expected_asset); + + let wire = result.1.to_wire(ctx).unwrap(); + assert_eq!(wire, as_path_bytes); + } + + #[test] + fn test_next_hop_path_attribute() { + let nh_bytes: &[u8] = &[192, 168, 1, 1]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let result = NextHopPathAttribute::from_wire(ctx, nh_bytes).unwrap(); + + assert_eq!(result.1 .0, "192.168.1.1".parse::().unwrap()); + let wire = result.1.to_wire(ctx).unwrap(); + assert_eq!(wire, nh_bytes); + assert_eq!(result.1.wire_len(ctx).unwrap(), wire.len() as u16); + } + + #[test] + fn test_multi_exit_discriminator_path_attribute() { + let med_bytes: &[u8] = &[0xca, 0x00, 0x00, 0xbe]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let result = MultiExitDiscPathAttribute::from_wire(ctx, med_bytes).unwrap(); + + assert_eq!(result.1 .0, 3388997822); + let wire = result.1.to_wire(ctx).unwrap(); + assert_eq!(wire, med_bytes); + assert_eq!(result.1.wire_len(ctx).unwrap(), wire.len() as u16); + } + + #[test] + fn test_local_pref_path_attribute() { + let local_pref_bytes: &[u8] = &[0xca, 0x00, 0x00, 0xbe]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let result = LocalPrefPathAttribute::from_wire(ctx, local_pref_bytes).unwrap(); + + assert_eq!(result.1 .0, 3388997822); + let wire = result.1.to_wire(ctx).unwrap(); + assert_eq!(wire, local_pref_bytes); + assert_eq!(result.1.wire_len(ctx).unwrap(), wire.len() as u16); + } + + #[test] + fn test_communities_path_attribute() { + let communities_bytes: &[u8] = &[ + 0x00, 0x00, 0x32, 0xbd, 0x00, 0x00, 0x41, 0x5f, 0x32, 0xe6, 0x00, 0x01, 0x32, 0xe6, + 0x10, 0x73, 0x32, 0xe6, 0xca, 0xbd, 0x57, 0x54, 0x0b, 0xb8, 0x57, 0x54, 0x0b, 0xb9, + 0x57, 0x54, 0x2b, 0x5c, 0x57, 0x54, 0xff, 0xe6, 0x57, 0x54, 0xff, 0xf1, 0x6f, 0xf7, + 0xff, 0xf1, 0x73, 0xfb, 0x0f, 0xa0, 0x73, 0xfb, 0x0f, 0xc8, 0x9a, 0x74, 0x0f, 0xa0, + 0x9a, 0x74, 0x0f, 0xb4, 0xdf, 0x1e, 0x07, 0xd0, 0xdf, 0x1e, 0x07, 0xe4, + ]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let result = CommunitiesPathAttribute::from_wire(ctx, communities_bytes).unwrap(); + let expected_communities: Vec<(u16, u16)> = vec![ + (0, 0x32bd), + (0, 0x415f), + (13030, 1), + (13030, 4211), + (13030, 51901), + (22356, 3000), + (22356, 3001), + (22356, 11100), + (22356, 65510), + (22356, 65521), + (28663, 65521), + (29691, 4000), + (29691, 4040), + (39540, 4000), + (39540, 4020), + (57118, 2000), + (57118, 2020), + ]; + assert_eq!(result.1.values.len(), expected_communities.len()); + for (i, community) in result.1.values.iter().enumerate() { + assert_eq!(community.asn, expected_communities[i].0); + assert_eq!(community.payload, expected_communities[i].1); + } + let wire: Vec = result.1.to_wire(ctx).unwrap(); + assert_eq!(wire, communities_bytes); + assert_eq!(wire.len() as u16, result.1.wire_len(ctx).unwrap()); + } + + #[test] + fn test_large_communities_path_attribute() { + let large_community_bytes: &[u8] = &[ + 0x00, 0x00, 0xdf, 0x1e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xdf, 0x1e, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, + ]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let result = LargeCommunitiesPathAttribute::from_wire(ctx, large_community_bytes).unwrap(); + assert_eq!(result.1.values.len(), 2); + assert_eq!(result.1.values[0].global_admin, 57118); + assert_eq!(result.1.values[0].ld1, 20); + assert_eq!(result.1.values[0].ld2, 0); + assert_eq!(result.1.values[1].global_admin, 57118); + assert_eq!(result.1.values[1].ld1, 20); + assert_eq!(result.1.values[1].ld2, 20); + + let wire: Vec = result.1.to_wire(ctx).unwrap(); + assert_eq!(wire, large_community_bytes); + assert_eq!(wire.len() as u16, result.1.wire_len(ctx).unwrap()); + } + + #[test] + fn test_mp_reach_nlri_path_attribute() { + let mp_reach_bytes: &[u8] = &[ + 0x00, 0x02, // IPv6 + 0x01, // Unicast + 0x10, // Length of IPv6 nexthop + 0x20, 0x01, 0x0d, 0xb8, 0x00, 0x00, 0x00, 0x00, // nh addr part one + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, // nh addr part two + 0x00, // Reserved + 0x20, 0x20, 0x01, 0x0d, 0xb8, // NLRI 1 + 0x10, 0xfe, 0x80, // NLRI 2 + ]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let result: (&[u8], MPReachNLRIPathAttribute) = + MPReachNLRIPathAttribute::from_wire(ctx, mp_reach_bytes).unwrap(); + assert_eq!(result.1.afi, Ipv6); + assert_eq!(result.1.safi, Unicast); + assert_eq!( + result.1.nexthop, + vec![ + 0x20, 0x01, 0x0d, 0xb8, 0x00, 0x00, 0x00, 0x00, // nh addr part one + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, // nh addr part two + ] + ); + assert_eq!(result.1.nlris.len(), 2); + assert_eq!(format!("{}", result.1.nlris[0]), "2001:db8::/32"); + assert_eq!(format!("{}", result.1.nlris[1]), "fe80::/16"); + assert_eq!(result.0.len(), 0); + + let wire: Vec = result.1.to_wire(ctx).unwrap(); + assert_eq!(wire.as_slice(), mp_reach_bytes); + assert_eq!(result.1.wire_len(ctx).unwrap() as usize, wire.len()); + } + + #[test] + fn test_mp_unreach_nlri_path_attribute() { + let mp_unreach_bytes: &[u8] = &[ + 0x00, 0x02, // IPv6 + 0x01, // Unicast + 0x20, 0x20, 0x01, 0x0d, 0xb8, // NLRI 1 + 0x10, 0xfe, 0x80, // NLRI 2 + ]; + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let result: (&[u8], MPUnreachNLRIPathAttribute) = + MPUnreachNLRIPathAttribute::from_wire(ctx, mp_unreach_bytes).unwrap(); + assert_eq!(result.1.afi, Ipv6); + assert_eq!(result.1.safi, Unicast); + assert_eq!(result.1.nlris.len(), 2); + assert_eq!(format!("{}", result.1.nlris[0]), "2001:db8::/32"); + assert_eq!(format!("{}", result.1.nlris[1]), "fe80::/16"); + assert_eq!(result.0.len(), 0); + + let wire: Vec = result.1.to_wire(ctx).unwrap(); + assert_eq!(wire.as_slice(), mp_unreach_bytes); + assert_eq!(result.1.wire_len(ctx).unwrap() as usize, wire.len()); + } + + // Tests the high level dispatching of the path attribute parser + #[test] + fn test_path_attribute_parsing<'a>() { + let path_attr_bytes: &[u8] = &[ + 0x40, 0x01, 0x01, 0x00, 0x50, 0x02, 0x00, 0x1a, 0x02, 0x06, 0x00, 0x00, 0x9a, 0x74, + 0x00, 0x00, 0x62, 0x03, 0x00, 0x00, 0x0b, 0x62, 0x00, 0x00, 0x19, 0x35, 0x00, 0x00, + 0x20, 0x9a, 0x00, 0x00, 0x34, 0x17, 0x40, 0x03, 0x04, 0xb9, 0x5f, 0xdb, 0x24, 0xc0, + 0x08, 0x2c, 0x0b, 0x62, 0x01, 0xa4, 0x0b, 0x62, 0x04, 0xbf, 0x0b, 0x62, 0x08, 0xa6, + 0x0b, 0x62, 0x0c, 0x80, 0x19, 0x35, 0x07, 0xd0, 0x19, 0x35, 0x09, 0xc4, 0x19, 0x35, + 0x09, 0xcf, 0x62, 0x03, 0x0b, 0x62, 0x62, 0x03, 0x2f, 0x69, 0x9a, 0x74, 0x0f, 0xa0, + 0x9a, 0x74, 0x0f, 0xbe, + ]; + + let ctx = &ParserContext::new().four_octet_asn(true).nlri_mode(Ipv6); + let (buf, res): (_, Vec) = + nom::multi::many0(|buf: &'a [u8]| PathAttribute::from_wire(ctx, buf))(path_attr_bytes) + .unwrap(); + assert_eq!(buf.len(), 0); + let expected_str = "[OriginPathAttribute(OriginPathAttribute(0)), \ + ASPathAttribute(ASPathAttribute { segments: \ + [ASPathSegment { ordered: true, path: [39540, 25091, 2914, 6453, 8346, 13335] }] }), \ + NextHopPathAttribute(NextHopPathAttribute(185.95.219.36)), \ + CommunitiesPathAttribute(CommunitiesPathAttribute { values: \ + [CommunitiesPayload { asn: 2914, payload: 420 }, \ + CommunitiesPayload { asn: 2914, payload: 1215 }, \ + CommunitiesPayload { asn: 2914, payload: 2214 }, \ + CommunitiesPayload { asn: 2914, payload: 3200 }, \ + CommunitiesPayload { asn: 6453, payload: 2000 }, \ + CommunitiesPayload { asn: 6453, payload: 2500 }, \ + CommunitiesPayload { asn: 6453, payload: 2511 }, \ + CommunitiesPayload { asn: 25091, payload: 2914 }, \ + CommunitiesPayload { asn: 25091, payload: 12137 }, \ + CommunitiesPayload { asn: 39540, payload: 4000 }, \ + CommunitiesPayload { asn: 39540, payload: 4030 }] })]"; + assert_eq!(format!("{:?}", res), expected_str); + } +} diff --git a/bgpd/src/bgp_packet/traits.rs b/bgpd/src/bgp_packet/traits.rs new file mode 100644 index 0000000..f1a31ff --- /dev/null +++ b/bgpd/src/bgp_packet/traits.rs @@ -0,0 +1,81 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Implements high level abstractions for use in the BGP parser. + +use crate::bgp_packet::constants::AddressFamilyIdentifier; +use nom::error::ErrorKind; +use nom::error::ParseError; +use nom::IResult; + +// ParserContext contains information pertinent to configurations which affect +// how message parsing is to be handled. +#[derive(Debug, PartialEq, Clone)] +pub struct ParserContext { + // Whether the peer is RFC6793 compliant. + pub four_octet_asn: Option, + // nlri_mode specifies if a parsed NLRI prefix should be a IPv4 or IPv6 address. + pub nlri_mode: Option, +} + +impl ParserContext { + pub fn new() -> ParserContext { + ParserContext { + four_octet_asn: None, + nlri_mode: None, + } + } + + pub fn four_octet_asn(mut self, v: bool) -> Self { + self.four_octet_asn = Some(v); + self + } + + pub fn nlri_mode(mut self, v: AddressFamilyIdentifier) -> Self { + self.nlri_mode = Some(v); + self + } +} + +// Custom error type for the parser. +#[derive(Debug, PartialEq)] +pub enum BGPParserError { + CustomText(String), + Nom(I, ErrorKind), +} + +impl ParseError for BGPParserError { + fn from_error_kind(input: I, kind: ErrorKind) -> Self { + BGPParserError::Nom(input, kind) + } + fn append(_: I, _: ErrorKind, other: Self) -> Self { + other + } +} + +pub trait WritablePacket { + /// to_wire serializes the packet to the wire format bytes. + fn to_wire(&self, ctx: &ParserContext) -> Result, &'static str>; + /// wire_len is the length of the message in bytes as would be on the wire. + fn wire_len(&self, ctx: &ParserContext) -> Result; +} + +pub trait ReadablePacket { + fn from_wire<'a>( + ctx: &ParserContext, + i: &'a [u8], + ) -> IResult<&'a [u8], Self, BGPParserError<&'a [u8]>> + where + Self: Sized; +} diff --git a/bgpd/src/lib.rs b/bgpd/src/lib.rs new file mode 100644 index 0000000..a393cbc --- /dev/null +++ b/bgpd/src/lib.rs @@ -0,0 +1,3 @@ +pub mod bgp_packet; +pub mod route_client; +pub mod server; diff --git a/bgpd/src/main.rs b/bgpd/src/main.rs new file mode 100644 index 0000000..1157280 --- /dev/null +++ b/bgpd/src/main.rs @@ -0,0 +1,93 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bgpd::server::config::ServerConfig; +use bgpd::server::bgp_server::Server; +use clap::{App, Arg}; +use core::sync::atomic::AtomicBool; +use libc::SIGUSR1; +use signal_hook::consts::signal::*; +use signal_hook::consts::TERM_SIGNALS; +use signal_hook::flag; +use signal_hook::iterator::exfiltrator::WithOrigin; +use signal_hook::iterator::SignalsInfo; +use std::fs::File; +use std::io::BufReader; +use std::process::exit; +use std::sync::Arc; +use tracing::info; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let subscriber = tracing_subscriber::fmt().with_env_filter("bgpd=info"); + + match subscriber.try_init() { + Ok(()) => {} + Err(e) => { + eprintln!("Failed to initialize logger: {:?}", e); + exit(1); + } + } + + let argv_matches = App::new("bgpd") + .author("Rayhaan Jaufeerally ") + .version("0.1") + .about("net-control-plane BGP daemon") + .arg(Arg::with_name("config").takes_value(true)) + .get_matches(); + + info!("Starting BGP Daemon!"); + + let config_file = File::open(argv_matches.value_of("config").unwrap_or("config.json")).unwrap(); + let reader = BufReader::new(config_file); + let server_config: ServerConfig = serde_json::from_reader(reader).unwrap(); + + info!("Parsed server config"); + + let mut bgp_server = Server::new(server_config); + bgp_server.start(true).await.unwrap(); + + // The following signal handling code is from: + // https://docs.rs/signal-hook/0.3.10/signal_hook/ + // Comments removed for brevity. + let term_now = Arc::new(AtomicBool::new(false)); + for sig in TERM_SIGNALS { + flag::register_conditional_shutdown(*sig, 1, Arc::clone(&term_now))?; + flag::register(*sig, Arc::clone(&term_now))?; + } + + let mut sigs = vec![SIGHUP, SIGUSR1]; + sigs.extend(TERM_SIGNALS); + let mut signals = SignalsInfo::::new(&sigs)?; + + for info in &mut signals { + match info.signal { + // TODO: Implement something on receiving SIGHUP / SIGUSR1. + SIGHUP => { + println!("Caught SIGHUP, not doing anything"); + } + SIGUSR1 => { + println!("Caught SIGUSR1, not doing anything"); + } + _term_sig => { + eprintln!("Shutting down app"); + break; + } + } + } + + bgp_server.shutdown().await; + + Ok(()) +} diff --git a/bgpd/src/route_client/fib_state.rs b/bgpd/src/route_client/fib_state.rs new file mode 100644 index 0000000..7d41f9b --- /dev/null +++ b/bgpd/src/route_client/fib_state.rs @@ -0,0 +1,178 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::constants::AddressFamilyIdentifier; +use crate::bgp_packet::nlri::NLRI; +use crate::route_client::southbound_interface::SouthboundInterface; +use futures::lock::Mutex; +use ip_network_table_deps_treebitmap::address::Address; +use ip_network_table_deps_treebitmap::IpLookupTable; +use std::convert::{TryFrom, TryInto}; +use std::fmt::Formatter; +use std::net::Ipv6Addr; +use std::net::{IpAddr, Ipv4Addr}; +use std::sync::Arc; +use tracing::{trace, warn}; + +/// fib_state implements the logic to maintain forwarding routes in the FIB. +/// This for now means the Linux Kernel via Netlink, but in the future can +/// be extended to include other targets such as OpenFlow or even program +/// a router using BGP. + +#[derive(Debug)] +pub struct FibEntry { + nexthop: IpAddr, +} + +pub struct FibState { + pub fib: IpLookupTable>>, + pub southbound: S, + pub af: AddressFamilyIdentifier, + pub table: u32, +} + +impl std::fmt::Debug for FibState { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + write!(f, "FibState af: {:?}, table: {}", self.af, self.table) + } +} + +/// to_octets provides an interface for accessing an address as a vector of bytes. +/// This is implemented for IPv4Addr and IPv6Addr to be able to use them interchangably +/// to send updates to the kernel. +pub trait ToOctets { + fn octets(&self) -> Vec; +} + +impl ToOctets for Ipv4Addr { + fn octets(&self) -> Vec { + self.octets().into() + } +} + +impl ToOctets for Ipv6Addr { + fn octets(&self) -> Vec { + self.octets().into() + } +} + +impl< + A: Address + + std::convert::TryFrom + + ToOctets + + std::cmp::PartialEq + + std::fmt::Display + + std::fmt::Debug, + S: SouthboundInterface, + > FibState +where + String: From<>::Error>, +{ + /// route_add requests updating the nexthop to a particular path if it is not already + /// the best path. + pub async fn route_add(&mut self, nlri: &NLRI, nexthop: IpAddr) -> Result<(), String> { + // Lookup the path in the Fib, there are three possible outcomes: + // 1. The route is not yet known, we add it to the FibState and inject it into the kernel, + // 2. The route is known and has a prior nexthop that needs to be updated + // 3. The route is known and has the same nexthop: no-op. + let prefix_addr: A = nlri.clone().try_into()?; + match self + .fib + .exact_match(prefix_addr, nlri.prefixlen.into()) + .as_mut() + { + Some(entry_wrapped) => { + let mut entry = entry_wrapped.lock().await; + if entry.nexthop == nexthop { + // Nothing to do, route already in kernel. + trace!("Skipping route that already exists in kernel"); + } else { + // Remove old route + trace!("Remove old route: {:?}", entry); + if let Err(e) = self.southbound.route_del(nlri.clone(), entry.nexthop).await { + warn!( + "Southbound interface returned error when trying to remove route: {} via {}, error: {}", + nlri, entry.nexthop, e + ); + return Err("Netlink remove error".to_string()); + } + + // Add new route + trace!( + "Add new route: prefix: {:?}, nexthop: {}", + nlri.prefix, + nexthop + ); + if let Err(e) = self + .southbound + .route_add(self.af, nlri.clone(), nexthop) + .await + { + warn!( + "Netlink returned error when trying to add route: {} via {}, error: {}", + nlri, nexthop, e + ); + return Err("Netlink add error".to_string()); + } + + entry.nexthop = nexthop; + } + } + None => { + // Need to insert a new entry for this route + let entry = FibEntry { + nexthop: nexthop.clone(), + }; + + if let Err(e) = self + .southbound + .route_add(self.af, nlri.clone(), nexthop) + .await + { + warn!( + "Netlink returned error when trying to add route: {} via {}, error: {}", + nlri, nexthop, e + ); + return Err("Netlink add error".to_string()); + } + + let addr: A = nlri.clone().try_into()?; + self.fib + .insert(addr, nlri.prefixlen.into(), Arc::new(Mutex::new(entry))); + } + }; + Ok(()) + } + + /// route_del removes a route from the FibState and kernel. + pub async fn route_del(&mut self, nlri: NLRI) -> Result<(), String> { + let prefix_addr: A = nlri.clone().try_into()?; + if let Some(entry_wrapped) = self.fib.exact_match(prefix_addr, nlri.prefixlen.into()) { + { + let entry = entry_wrapped.lock().await; + if let Err(e) = self.southbound.route_del(nlri.clone(), entry.nexthop).await { + warn!( + "Failed to apply route mutation to remove NLRI: {}, error: {}", + nlri, e + ); + } + } + self.fib.remove(prefix_addr, nlri.prefixlen.into()); + } else { + warn!("Failed to find prefix to remove from FIB: {}", nlri); + } + + Ok(()) + } +} diff --git a/bgpd/src/route_client/main.rs b/bgpd/src/route_client/main.rs new file mode 100644 index 0000000..bd561d3 --- /dev/null +++ b/bgpd/src/route_client/main.rs @@ -0,0 +1,273 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bgpd::route_client::netlink::NetlinkConnector; +use bgpd::route_client::southbound_interface::DummyVerifier; +use bgpd::route_client::southbound_interface::SouthboundInterface; +use clap::Parser; +use log::trace; +use std::convert::TryInto; +use std::net::IpAddr; +use std::net::Ipv4Addr; + +use std::net::Ipv6Addr; +use std::str::FromStr; +use std::time::Duration; +use tonic::transport::Uri; + +use bgpd::bgp_packet::constants::AddressFamilyIdentifier; +use bgpd::bgp_packet::nlri::NLRI; +use bgpd::route_client::fib_state::FibState; + +use ip_network_table_deps_treebitmap::IpLookupTable; +use tonic::transport::Endpoint; +use tracing::{info, warn}; + +use anyhow::{anyhow, Result}; + +use crate::proto::route_service_client::RouteServiceClient; + +pub mod proto { + tonic::include_proto!("bgpd.grpc"); +} + +fn vec_to_array(v: Vec) -> Result<[T; N], anyhow::Error> { + v.try_into() + .map_err(|_| anyhow::Error::msg("Wrong size of Vec".to_string())) +} + +/// Temporary hack to select the route to install to the FIB. +/// TODO: Implement proper route selection logic. +fn select_best_route(ps: &proto::PathSet) -> Option { + let mut selected: Option = None; + for path in &ps.paths { + if let Some(current) = selected.as_ref() { + if path.local_pref < current.local_pref { + selected = Some(path.clone()); + } + } else { + selected = Some(path.clone()); + } + } + selected +} + +async fn run_connector_v4( + route_server: String, + rt_table: u32, + dry_run: bool, + southbound: S, +) -> Result<(), anyhow::Error> { + // Create netlink socket. + let mut fib_state = FibState:: { + fib: IpLookupTable::new(), + southbound, + af: AddressFamilyIdentifier::Ipv4, + table: rt_table, + }; + + let uri = Uri::from_str(route_server.as_str()).unwrap(); + let endpoint = Endpoint::from(uri).keep_alive_timeout(Duration::from_secs(10)); + let mut client = RouteServiceClient::connect(endpoint).await?; + let request = proto::StreamPathsRequest { + address_family: proto::AddressFamily::IPv4.into(), + }; + + let mut stream = client.stream_paths(request).await?.into_inner(); + let mut msg_ctr: u64 = 0; + while let Some(route) = stream.message().await? { + let nlri = NLRI { + afi: AddressFamilyIdentifier::Ipv4, + prefixlen: route.prefix.as_ref().unwrap().prefix_len as u8, + prefix: route.prefix.as_ref().unwrap().ip_prefix.clone(), + }; + + trace!("IPv4 Update {} for: {} ", msg_ctr, nlri); + msg_ctr += 1; + + if !dry_run { + if !route.paths.is_empty() { + if let Some(best) = select_best_route(&route) { + // Hack to convert the nexthop into a v4 addr + let nh_bytes: [u8; 4] = vec_to_array(best.nexthop.clone())?; + let nh_addr: Ipv4Addr = Ipv4Addr::from(nh_bytes); + if let Err(e) = fib_state.route_add(&nlri, IpAddr::V4(nh_addr)).await { + return Err(anyhow!("Failed to add route {}: {}", nlri, e)); + } + } + } else { + // No more paths, delete + if let Err(e) = fib_state.route_del(nlri).await { + return Err(anyhow!("Failed to delete route: {}", e)); + } + } + } + + trace!("Number of paths: {}", route.paths.len()); + for path in &route.paths { + // TODO: have a proper error here not unwrap. + let nexthop_bytes: [u8; 4] = path.nexthop.clone().try_into().unwrap(); + let nexthop: Ipv4Addr = nexthop_bytes.into(); + trace!( + "nexthop: {}, peer: {}, local_pref: {}, med: {}, as_path: {:?}", + nexthop, + path.peer_name, + path.local_pref, + path.med, + path.as_path + ); + } + } + + unreachable!() +} + +async fn run_connector_v6( + route_server: String, + rt_table: u32, + dry_run: bool, + southbound: S, +) -> Result<()> { + let mut fib_state = FibState:: { + fib: IpLookupTable::new(), + southbound, + af: AddressFamilyIdentifier::Ipv6, + table: rt_table, + }; + + let uri = Uri::from_str(route_server.as_str()).unwrap(); + let endpoint = Endpoint::from(uri).keep_alive_timeout(Duration::from_secs(10)); + let mut client = RouteServiceClient::connect(endpoint).await?; + let request = proto::StreamPathsRequest { + address_family: proto::AddressFamily::IPv6.into(), + }; + info!("Request: {:?}", request); + + let mut stream = client.stream_paths(request).await?.into_inner(); + let mut msg_ctr: u64 = 0; + while let Some(route) = stream.message().await? { + let nlri = NLRI { + afi: AddressFamilyIdentifier::Ipv6, + prefixlen: route.prefix.as_ref().unwrap().prefix_len as u8, + prefix: route.prefix.as_ref().unwrap().ip_prefix.clone(), + }; + + trace!("IPv6 Update {} for: {} ", msg_ctr, nlri); + msg_ctr += 1; + + if !dry_run { + if !route.paths.is_empty() { + if let Some(best) = select_best_route(&route) { + // Hack to convert the nexthop into a v6 addr + let nh_bytes: [u8; 16] = vec_to_array(best.nexthop.clone())?; + let nh_addr: Ipv6Addr = Ipv6Addr::from(nh_bytes); + if let Err(e) = fib_state.route_add(&nlri, IpAddr::V6(nh_addr)).await { + return Err(anyhow!("Failed to add route {}: {}", nlri, e)); + } + } + } else { + // No more paths, delete + if let Err(e) = fib_state.route_del(nlri).await { + return Err(anyhow!("Failed to delete route: {}", e)); + } + } + } + + trace!("Number of paths: {}", route.paths.len()); + for path in &route.paths { + // TODO: have a proper error here not unwrap. + let nexthop_bytes: [u8; 16] = path.nexthop.clone().try_into().unwrap(); + let nexthop: Ipv6Addr = nexthop_bytes.into(); + trace!( + "nexthop: {}, peer: {}, local_pref: {}, med: {}, as_path: {:?}", + nexthop, + path.peer_name, + path.local_pref, + path.med, + path.as_path + ); + } + } + + unreachable!() +} + +#[derive(Parser)] +#[clap( + author = "Rayhaan Jaufeerally ", + version = "0.1", + about = "Installs routes from a BGP speaker via streaming RPC to the forwarding plane" +)] +struct Cli { + #[clap(long = "route_server")] + route_server: String, + #[clap(long = "rt_table")] + rt_table: Option, + dry_run: bool, +} + +#[tokio::main] +async fn main() -> Result<()> { + let args = Cli::parse(); + + let _init_log = stderrlog::new() + .verbosity(2) // Shows info level. + .show_module_names(true) + .init(); + info!("Starting route client"); + + let rt_table = match args.rt_table { + Some(table) => table, + None => 201, + }; + + let v4_joinhandle = { + let server_addr = args.route_server.clone(); + tokio::task::spawn(async move { + run_connector_v4::( + server_addr.clone(), + rt_table, + args.dry_run, + NetlinkConnector::new(Some(rt_table)).await.unwrap(), + ) + .await + .unwrap(); + }) + }; + + let v6_joinhandle = { + let server_addr = args.route_server.clone(); + tokio::task::spawn(async move { + run_connector_v6::( + server_addr, + rt_table, + args.dry_run, + NetlinkConnector::new(Some(rt_table)).await.unwrap(), + ) + .await + .unwrap(); + }) + }; + + tokio::select! { + _ = v4_joinhandle => { + warn!("Unexpected exit of IPv4 connector"); + }, + _ = v6_joinhandle => { + warn!("Unexpected exit of IPv6 connector"); + } + } + + Ok(()) +} diff --git a/bgpd/src/route_client/mod.rs b/bgpd/src/route_client/mod.rs new file mode 100644 index 0000000..1b6de0b --- /dev/null +++ b/bgpd/src/route_client/mod.rs @@ -0,0 +1,17 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod fib_state; +pub mod netlink; +pub mod southbound_interface; diff --git a/bgpd/src/route_client/netlink.rs b/bgpd/src/route_client/netlink.rs new file mode 100644 index 0000000..3cec408 --- /dev/null +++ b/bgpd/src/route_client/netlink.rs @@ -0,0 +1,173 @@ +use crate::bgp_packet::{constants::AddressFamilyIdentifier, nlri::NLRI}; +use anyhow::Result; +use async_trait::async_trait; +use futures::TryStreamExt; +use netlink::constants::RTN_UNICAST; +use netlink_packet_route::{rtnl::route::nlas::Nla, RouteHeader}; +use netlink_packet_route::{RouteMessage, RTPROT_STATIC}; +use rtnetlink::IpVersion; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::{convert::TryInto, io::ErrorKind}; + +use super::southbound_interface::SouthboundInterface; + +/// NetlinkConnector implements methods to read/update Linux networking stuff including +/// routes and link level info. +pub struct NetlinkConnector { + handle: rtnetlink::Handle, + table: Option, +} + +#[async_trait] +impl SouthboundInterface for NetlinkConnector { + async fn route_add( + &mut self, + address_family: AddressFamilyIdentifier, + prefix: NLRI, + nexthop: IpAddr, + ) -> Result<()> { + let route = self.handle.route(); + match address_family { + AddressFamilyIdentifier::Ipv6 => { + let addr: Ipv6Addr = match prefix.clone().try_into()? { + IpAddr::V6(addr) => addr, + _ => { + return Err(anyhow::Error::from(std::io::Error::new( + ErrorKind::InvalidInput, + "Got non-IPv6 address from NLRI", + ))) + } + }; + let gw_addr: Ipv6Addr = match nexthop.clone().try_into()? { + IpAddr::V6(addr) => addr, + _ => { + return Err(anyhow::Error::from(std::io::Error::new( + ErrorKind::InvalidInput, + "Got non-IPv6 gateway for IPv6 NLRI", + ))) + } + }; + let mut mutation = route + .add() + .v6() + .destination_prefix(addr, prefix.prefixlen) + .gateway(gw_addr); + if let Some(table_id) = self.table { + mutation = mutation.table(table_id.try_into().unwrap()); + } + mutation.execute().await.map_err(|e| anyhow::Error::from(e)) + } + AddressFamilyIdentifier::Ipv4 => { + let addr: Ipv4Addr = match prefix.clone().try_into()? { + IpAddr::V4(addr) => addr, + _ => { + return Err(anyhow::Error::from(std::io::Error::new( + ErrorKind::InvalidInput, + "Got non-IPv4 address from NLRI", + ))) + } + }; + let gw_addr = match nexthop.clone().try_into()? { + IpAddr::V4(addr) => addr, + _ => { + return Err(anyhow::Error::from(std::io::Error::new( + ErrorKind::InvalidInput, + "Got non-IPv4 gateway for IPv4 NLRI", + ))) + } + }; + let mut mutation = route + .add() + .v4() + .destination_prefix(addr, prefix.prefixlen) + .gateway(gw_addr); + if let Some(table_id) = self.table { + mutation = mutation.table(table_id.try_into().unwrap()); + } + mutation.execute().await.map_err(|e| anyhow::Error::from(e)) + } + } + } + + async fn route_del(&mut self, prefix: NLRI, nexthop: IpAddr) -> Result<()> { + let nh_octets = match nexthop { + IpAddr::V6(addr) => addr.octets().to_vec(), + IpAddr::V4(addr) => addr.octets().to_vec(), + }; + let rt_handle = self.handle.route(); + let address_family = match prefix.afi { + AddressFamilyIdentifier::Ipv4 => netlink_packet_route::rtnl::constants::AF_INET as u8, + AddressFamilyIdentifier::Ipv6 => netlink_packet_route::rtnl::constants::AF_INET6 as u8, + }; + let header = RouteHeader { + address_family, + destination_prefix_length: prefix.prefixlen, + table: self.table.unwrap_or(0) as u8, + protocol: RTPROT_STATIC, + kind: RTN_UNICAST, + ..Default::default() + }; + let mut rt_msg = RouteMessage { + header, + ..Default::default() + }; + let prefix_octets = match prefix.afi { + AddressFamilyIdentifier::Ipv4 => { + let addr: Ipv4Addr = match prefix.clone().try_into()? { + IpAddr::V4(addr) => addr, + _ => { + return Err(anyhow::Error::from(std::io::Error::new( + ErrorKind::InvalidInput, + "Got non-IPv4 address from NLRI", + ))) + } + }; + addr.octets().to_vec() + } + AddressFamilyIdentifier::Ipv6 => { + let addr: Ipv6Addr = match prefix.clone().try_into()? { + IpAddr::V6(addr) => addr, + _ => { + return Err(anyhow::Error::from(std::io::Error::new( + ErrorKind::InvalidInput, + "Got non-IPv6 address from NLRI", + ))) + } + }; + addr.octets().to_vec() + } + }; + rt_msg.nlas.push(Nla::Destination(prefix_octets)); + rt_msg.nlas.push(Nla::Gateway(nh_octets)); + rt_handle + .del(rt_msg) + .execute() + .await + .map_err(|e| anyhow::Error::from(e)) + } +} + +impl NetlinkConnector { + pub async fn new(table: Option) -> Result { + let (connection, handle, _) = rtnetlink::new_connection()?; + tokio::spawn(connection); + Ok(NetlinkConnector { handle, table }) + } + + pub async fn dump_routes( + &mut self, + address_family: AddressFamilyIdentifier, + table: Option, + ) -> Result, rtnetlink::Error> { + let mut req = self.handle.route().get(match address_family { + AddressFamilyIdentifier::Ipv4 => IpVersion::V4, + AddressFamilyIdentifier::Ipv6 => IpVersion::V6, + }); + if let Some(table_id) = table { + req.message_mut() + .nlas + .push(Nla::Table(table_id.try_into().unwrap())); + } + req.execute().try_collect().await + } +} diff --git a/bgpd/src/route_client/southbound_interface.rs b/bgpd/src/route_client/southbound_interface.rs new file mode 100644 index 0000000..b93cfa3 --- /dev/null +++ b/bgpd/src/route_client/southbound_interface.rs @@ -0,0 +1,100 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::{collections::HashMap, net::IpAddr}; + +use crate::bgp_packet::{constants::AddressFamilyIdentifier, nlri::NLRI}; +use anyhow::{anyhow, Result}; +use async_trait::async_trait; +use log::info; + +/// SouthboundInterface provides a uniform API to network forwarding elements +/// These are devices or targets that perform packet routing and are the end +/// consumers of packet routing data. + +#[async_trait] +pub trait SouthboundInterface { + async fn route_add( + &mut self, + address_family: AddressFamilyIdentifier, + prefix: NLRI, + nexthop: IpAddr, + ) -> Result<()>; + async fn route_del(&mut self, prefix: NLRI, nexthop: IpAddr) -> Result<()>; +} + +/// DummyVerifier is a SouthboundInterface that checks that routes are not added more than +/// once and not removed when there are none. +pub struct DummyVerifier { + route_state: HashMap, +} + +impl std::default::Default for DummyVerifier { + fn default() -> DummyVerifier { + DummyVerifier { + route_state: HashMap::default(), + } + } +} + +#[async_trait] +impl SouthboundInterface for DummyVerifier { + async fn route_add( + &mut self, + _: AddressFamilyIdentifier, + prefix: NLRI, + nexthop: IpAddr, + ) -> Result<()> { + // Check that the route is not already present. + match self.route_state.get(&prefix) { + Some(value) => { + return Err(anyhow!( + "Prefix {} with nexthop {} already contained in route_state! when trying to add {} -> {}", + prefix, value, prefix, nexthop, + )); + } + _ => {} + } + if self.route_state.get(&prefix).is_some() {} + // Insert route into in memory state. + self.route_state.insert(prefix, nexthop); + + info!("Route add ok in verifier ({})", self.route_state.len()); + + Ok(()) + } + + async fn route_del(&mut self, prefix: NLRI, nexthop: IpAddr) -> Result<()> { + match self.route_state.remove(&prefix) { + Some(entry) => { + if entry != nexthop { + return Err(anyhow!( + "Removed entry's nexthop did not match: {} vs requested {}", + entry, + nexthop + )); + } + } + None => { + return Err(anyhow!( + "Requested removal of route {} that was not in route_state", + prefix + )); + } + } + + info!("Route del ok in verifier ({})", self.route_state.len()); + Ok(()) + } +} diff --git a/bgpd/src/server/bgp_server.rs b/bgpd/src/server/bgp_server.rs new file mode 100644 index 0000000..a7720df --- /dev/null +++ b/bgpd/src/server/bgp_server.rs @@ -0,0 +1,527 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::constants::AddressFamilyIdentifier; +use crate::server::config::PeerConfig; +use crate::server::config::ServerConfig; +use crate::server::peer::PeerCommands; +use crate::server::peer::PeerStateMachine; +use crate::server::rib_manager::RibManager; +use crate::server::rib_manager::RibSnapshot; +use crate::server::rib_manager::RouteManagerCommands; +use crate::server::route_server; +use crate::server::route_server::route_server::route_service_server::RouteServiceServer; +use std::collections::HashMap; +use std::net::Ipv4Addr; +use std::net::Ipv6Addr; +use std::net::SocketAddr; +use tokio::net::TcpListener; +use tokio::net::TcpStream; +use tokio::sync::broadcast; +use tokio::sync::mpsc::unbounded_channel; +use tokio::sync::mpsc::UnboundedSender; +use tokio::sync::oneshot; +use tracing::{info, warn}; +use warp::Filter; +use warp::Reply; + +// socket_listener starts listening on the given address, and passes clients that have +// made an inbound connection to the provided stream. It also implements logic for +// recreating the listener in the event that it fails. +// Notifier is sent the restult of the first attempt to start the listener. +async fn socket_listener( + c: UnboundedSender<(TcpStream, SocketAddr)>, + listen_addr: String, + notifier: oneshot::Sender>, + mut shutdown: broadcast::Receiver<()>, +) { + info!("Starting to listen on addr: {}", listen_addr); + let listener_result = TcpListener::bind(&listen_addr).await; + if let Err(e) = listener_result { + warn!("Listener for {} failed: {}", listen_addr, e.to_string()); + match notifier.send(Err(e.to_string())) { + Ok(_) => {} + Err(e) => warn!("Failed to send notification of channel error: {:?}", e), + } + return; + } + + let listener = listener_result.unwrap(); + match notifier.send(Ok(())) { + Ok(_) => {} + Err(e) => warn!("Failed to send notification of channel ready: {:?}", e), + } + info!("Sucessfully spawned listner for: {}", listen_addr); + loop { + let conn = tokio::select! { + res = listener.accept() => res, + _ = shutdown.recv() => { + info!("Shutting down listener"); + return; + } + }; + info!("Got something: {:?}", conn); + match conn { + Ok((stream, addr)) => { + info!("Accepted socket connection from {}", addr); + match c.send((stream, addr)) { + Ok(_) => {} + Err(e) => { + warn!( + "Dropped connection from {} due to mpsc::channel failure: {}", + addr, e + ); + } + } + } + Err(e) => { + warn!("Failed to accept connection: {}, aborting listener", e); + break; + } + } + } +} + +async fn start_http_server( + manager4: UnboundedSender>, + manager6: UnboundedSender>, + peers: HashMap>, + listen_addr: SocketAddr, + mut shutdown: broadcast::Receiver<()>, +) -> Result, String> { + async fn manager_get_routes_handler( + channel: UnboundedSender>, + ) -> Result { + let (tx, rx) = tokio::sync::oneshot::channel::>(); + if let Err(e) = channel.send(RouteManagerCommands::DumpRib(tx)) { + warn!("Failed to send DumpRib request: {}", e); + return Err(warp::reject()); + } + + match rx.await { + Ok(result) => Ok(warp::reply::json(&result)), + Err(e) => { + warn!("Failed to get RIB from manager: {}", e); + Err(warp::reject()) + } + } + } + + async fn rm_large_community( + chan: UnboundedSender, + ld1: u32, + ld2: u32, + ) -> Result { + let (tx, rx) = tokio::sync::oneshot::channel::(); + if let Err(e) = chan.send(PeerCommands::RemoveLargeCommunity((ld1, ld2), tx)) { + warn!("Failed to send RemoveLargeCommunity request: {}", e); + return Err(warp::reject()); + } + + match rx.await { + Ok(result) => Ok(warp::reply::json(&result)), + Err(e) => { + warn!( + "RemoveLargeCommunity response from peer state machine: {}", + e + ); + Err(warp::reject()) + } + } + } + + async fn add_large_community( + chan: UnboundedSender, + ld1: u32, + ld2: u32, + ) -> Result { + let (tx, rx) = tokio::sync::oneshot::channel::(); + if let Err(e) = chan.send(PeerCommands::AddLargeCommunity((ld1, ld2), tx)) { + warn!("Failed to send AddLargeCommunity request: {}", e); + return Err(warp::reject()); + } + + match rx.await { + Ok(result) => Ok(warp::reply::json(&result)), + Err(e) => { + warn!("AddLargeCommunity response from peer state machine: {}", e); + Err(warp::reject()) + } + } + } + + // reset_peer_connection causes the PSM to close the connection, flush state, and reconnect to the peer. + async fn reset_peer_connection( + peer_name: String, + peers: HashMap>, + ) -> Result { + if let Some(peer_sender) = peers.get(&peer_name) { + if let Err(e) = peer_sender.send(PeerCommands::ConnectionClosed()) { + Ok(warp::reply::with_status( + format!("Something went wrong: {}", e), + warp::http::StatusCode::INTERNAL_SERVER_ERROR, + ) + .into_response()) + } else { + Ok(warp::reply::html( + "Sent restart request to PeerStateMachine. Something might happen.", + ) + .into_response()) + } + } else { + Ok( + warp::reply::with_status("No such peer found!", warp::http::StatusCode::NOT_FOUND) + .into_response(), + ) + } + } + + /// peerz is a debugging endpoint for PeerStateMachines on this server. + async fn get_peerz( + peers: HashMap>, + ) -> Result { + let mut result: String = "".to_string(); + for (peer_name, sender) in peers { + result += &format!("

{}


", peer_name); + let (tx, rx) = oneshot::channel(); + match sender.send(PeerCommands::GetStatus(tx)) { + Ok(()) => {} + Err(e) => { + warn!("Failed to send request to PSM channel: {}", e); + return Ok(warp::reply::with_status( + "Something went wrong!", + warp::http::StatusCode::INTERNAL_SERVER_ERROR, + ) + .into_response()); + } + } + match rx.await { + Ok(resp) => { + result += &format!("Peer state: {:?}
", resp.state); + result += &format!("{:?}", resp.config); + } + Err(e) => { + warn!("error on rx from peer channel: {}", e); + return Ok(warp::reply::with_status( + "Something went wrong!", + warp::http::StatusCode::INTERNAL_SERVER_ERROR, + ) + .into_response()); + } + } + } + result += ""; + Ok(warp::http::Response::builder().body(result).into_response()) + } + + /* + async fn modify_community_fn( + add: bool, + peers: HashMap>, + name: String, + ld1: u32, + ld2: u32, + ) -> Result { + if let Some(chan) = peers.get(&name) { + if let Err(e) = func(chan.clone(), ld1, ld2).await { + warn!("Failed to add large community: {:?}", e); + return Err(warp::reject()); + } + } else { + return Err(warp::reject()); + } + Ok(warp::reply::with_status("Ok", warp::http::StatusCode::OK)) + } + + let add_community_filter = warp::post() + .map(move || true) + .and(warp::path::param()) + .and(warp::path!(u32 / u32)) + .and_then(modify_community_fn); + + */ + + // Start the web server that has access to the rib managers so that it can expose the state. + let v4_mgr_filter = warp::any().map(move || manager4.clone()); + + let warp_v4_routes = warp::get() + .and(warp::path("ipv4")) + .and(warp::path("routes")) + .and(warp::path::end()) + .and(v4_mgr_filter) + .and_then(manager_get_routes_handler); + + let v6_mgr_filter = warp::any().map(move || manager6.clone()); + + let warp_v6_routes = warp::get() + .and(warp::path("ipv6")) + .and(warp::path("routes")) + .and(warp::path::end()) + .and(v6_mgr_filter) + .and_then(manager_get_routes_handler); + + let peers_map_filter = warp::any().map(move || peers.clone()); + let peerz_route = warp::get() + .and(warp::path("peerz")) + .and(warp::path::end()) + .and(peers_map_filter.clone()) + .and_then(get_peerz); + + let peers_restart_route = warp::post() + .and(warp::path("peerz")) + .and(warp::path::param()) + .and(warp::path("restart")) + .and(warp::path::end()) + .and(peers_map_filter) + .and_then(reset_peer_connection); + + let routes = warp_v4_routes + .or(warp_v6_routes) + .or(peerz_route) + .or(peers_restart_route); + let (_, server) = warp::serve(routes) + .try_bind_with_graceful_shutdown(listen_addr, async move { + shutdown.recv().await.ok(); + }) + .map_err(|e| e.to_string())?; + Ok(tokio::task::spawn(server)) +} + +/// Server encapsulates the behavior of the BGP speaker. +pub struct Server { + config: ServerConfig, + + // shutdown is a channel that a + shutdown: broadcast::Sender<()>, + + // worker_handles contains the JoinHandle of tasks spawned by the server so that + // we can wait on them for shutdown. + worker_handles: Vec>, + + mgr_v6: Option>>, + mgr_v4: Option>>, +} + +impl Server { + pub fn new(config: ServerConfig) -> Server { + let (shutdown, _) = broadcast::channel(1); + Server { + config, + shutdown, + worker_handles: vec![], + mgr_v4: None, + mgr_v6: None, + } + } + + // start kicks off the BGP server + // wait_startup controls whether this function waits for the listeners to come up healthy + // before returning. This is useful in tests and other situations where we want to wait + // and then probe the endpoints. + pub async fn start(&mut self, wait_startup: bool) -> Result<(), String> { + // TODO: the following code spawns a bunch of asynchronous tasks, and it would be + // good to have a handle on the status of these tasks so that we can restart them + // or alert if they crash. + + // Channel for passing newly established TCP streams to the dispatcher. + let (tcp_in_tx, mut tcp_in_rx): (UnboundedSender<(TcpStream, SocketAddr)>, _) = + tokio::sync::mpsc::unbounded_channel(); + + // For every address we are meant to listen on, we spawn a task that will listen on + // that address. This is so that if the listening socket breaks somehow, we can + // periodically retry to listen again. + for listen_addr in self.config.clone().listen_addrs { + info!("Starting listener for {}", listen_addr.to_string()); + let sender = tcp_in_tx.clone(); + let (ready_tx, ready_rx) = oneshot::channel(); + let shutdown_channel = self.shutdown.subscribe(); + let listen_handle = tokio::spawn(async move { + socket_listener(sender, listen_addr.to_string(), ready_tx, shutdown_channel).await; + }); + self.worker_handles.push(listen_handle); + if wait_startup { + let statup_result = ready_rx.await; + match statup_result { + Ok(_) => {} + Err(err) => return Err(format!("Failed to startup listener: {}", err)), + } + } + } + + // Start the route manager for IPv6 and IPv4. + let (rp6_tx, rp6_rx) = unbounded_channel::>(); + self.mgr_v6 = Some(rp6_tx.clone()); + let mut rib_manager6: RibManager = + RibManager::::new(rp6_rx, self.shutdown.subscribe()).unwrap(); + tokio::spawn(async move { + match rib_manager6.run().await { + Ok(_) => {} + Err(e) => { + warn!("RIBManager exited: {}", e); + } + } + }); + + let (rp4_tx, rp4_rx) = unbounded_channel::>(); + self.mgr_v4 = Some(rp4_tx.clone()); + let mut rib_manager4: RibManager = + RibManager::::new(rp4_rx, self.shutdown.subscribe()).unwrap(); + tokio::spawn(async move { + match rib_manager4.run().await { + Ok(_) => {} + Err(e) => { + warn!("RIBManager exited: {}", e); + } + } + }); + + // Start a PeerStateMachine for every peer that is configured and store its channel so that + // we can communicate with it. + + let mut peer_statemachines: HashMap)> = + HashMap::new(); + + for peer_config in &self.config.peers { + let (psm_tx, psm_rx) = unbounded_channel::(); + match peer_config.afi { + AddressFamilyIdentifier::Ipv6 => { + let mut psm = PeerStateMachine::::new( + self.config.clone(), + peer_config.clone(), + psm_rx, + psm_tx.clone(), + rp6_tx.clone(), + self.shutdown.subscribe(), + ); + self.worker_handles.push(tokio::spawn(async move { + psm.run().await; + warn!("Should not reach here"); + })); + } + AddressFamilyIdentifier::Ipv4 => { + let mut psm = PeerStateMachine::::new( + self.config.clone(), + peer_config.clone(), + psm_rx, + psm_tx.clone(), + rp4_tx.clone(), + self.shutdown.subscribe(), + ); + self.worker_handles.push(tokio::spawn(async move { + psm.run().await; + warn!("Should not reach here"); + })); + } + _ => panic!("Unsupported address family: {}", peer_config.afi), + } + + peer_statemachines.insert(peer_config.name.clone(), (peer_config.clone(), psm_tx)); + } + + let mut peer_chan_map: HashMap> = HashMap::new(); + for (k, v) in &peer_statemachines { + peer_chan_map.insert(k.to_string(), v.1.clone()); + } + + // Start the HTTP server for debugging access. + if let Some(http_addr) = &self.config.http_addr { + let addr = http_addr.parse().unwrap(); + start_http_server( + rp4_tx.clone(), + rp6_tx.clone(), + peer_chan_map.clone(), + addr, + self.shutdown.subscribe(), + ) + .await + .unwrap(); + } + + // Start the gRPC server for streaming the RIB. + if let Some(grpc_addr) = &self.config.grpc_addr { + let addr = grpc_addr.parse().unwrap(); + info!("Running gRPC RouteService on {}", addr); + let rs = route_server::RouteServer { + ip4_manager: rp4_tx.clone(), + ip6_manager: rp6_tx.clone(), + peer_state_machines: peer_chan_map, + }; + + let svc = RouteServiceServer::new(rs); + tokio::spawn(async move { + if let Err(e) = tonic::transport::Server::builder() + .add_service(svc) + .serve(addr) + .await + { + warn!("Failed to run gRPC server: {}", e); + } + }); + } + + // Event loop for processing inbound connections. + let mut shutdown_recv = self.shutdown.subscribe(); + self.worker_handles.push(tokio::spawn(async move { + loop { + let next = tokio::select! { + cmd = tcp_in_rx.recv() => cmd, + _ = shutdown_recv.recv() => { + warn!("Peer connection dispatcher shutting down due to shutdown signal."); + return; + } + }; + match next { + Some((socket, addr)) => { + let mut psm_opt: Option> = None; + for (name, handle) in &peer_statemachines { + if handle.0.ip == addr.ip() { + info!("Got connection for peer: {}", name); + psm_opt = Some(handle.1.clone()); + } + } + if let Some(psm) = psm_opt { + psm.send(PeerCommands::NewConnection(socket)).unwrap(); + } else { + info!("Dropping unrecognized connection from {}", addr); + } + } + None => { + warn!("Failed to read incoming connections, exiting"); + break; + } + } + } + })); + + Ok(()) + } + + pub async fn shutdown(&mut self) { + match self.shutdown.send(()) { + Ok(_) => {} + Err(e) => { + warn!("Failed to send shutdown signal: {}", e); + return; + } + } + for handle in &mut self.worker_handles { + match handle.await { + Ok(_) => {} + Err(e) => { + warn!("Failed to shutdown task: {}", e); + } + } + } + } +} diff --git a/bgpd/src/server/config.rs b/bgpd/src/server/config.rs new file mode 100644 index 0000000..648d548 --- /dev/null +++ b/bgpd/src/server/config.rs @@ -0,0 +1,73 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::constants::{AddressFamilyIdentifier, SubsequentAddressFamilyIdentifier}; +use serde::{Deserialize, Serialize}; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; + +#[derive(Clone, Serialize, Deserialize)] +pub struct ServerConfig { + pub identifier: Ipv4Addr, + pub asn: u32, + pub hold_time: u16, + + // The address to listen on for control plane gRPC connections. + // If unset the gRPC server is not started. + pub grpc_addr: Option, + + // The address to listen on for the debugging HTTP server. + // If unset the HTTP server is not started. + pub http_addr: Option, + + // The addresses to listen on for BGP peers. + pub listen_addrs: Vec, + + pub peers: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PeerConfig { + /// A unique name for this peer. + pub name: String, + + pub ip: IpAddr, + /// Optional port number to communicate with this peer. + pub port: Option, + /// Autonomous system number of the peer. + pub asn: u32, + + pub afi: AddressFamilyIdentifier, + pub safi: SubsequentAddressFamilyIdentifier, + + pub local_pref: u32, + + // Announcements is a hardcoded list of BGP updates to send + // to the peer. + pub announcements: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PrefixAnnouncement { + pub prefix: String, + /// Nexthop to be announced for this prefix. + pub nexthop: IpAddr, + /// Linklocal nexthop to be used for IPv6 announcements. + pub llnh: Option, + + // Path attributes + pub local_pref: Option, + pub med: Option, + pub communities: Option>, + pub large_communities: Option>, +} diff --git a/bgpd/src/server/data_structures.rs b/bgpd/src/server/data_structures.rs new file mode 100644 index 0000000..dff7d0b --- /dev/null +++ b/bgpd/src/server/data_structures.rs @@ -0,0 +1,66 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::nlri::NLRI; +use crate::bgp_packet::path_attributes::PathAttribute; +use std::time::SystemTime; + +/// RouteInfo encapsulates information received about a particular BGP route. +#[derive(Clone, Debug)] +pub struct RouteInfo
{ + pub prefix: A, + pub prefixlen: u8, + pub nlri: NLRI, + + /// accepted is true if the route was accepted. + pub accepted: bool, + + /// rejection_reason contains the reason why a particular route was dropped. + pub rejection_reason: Option, + + /// Time at which this path was learned from the peer. + pub learned: SystemTime, + /// Time at which this path was last updated by the peer. + pub updated: SystemTime, + + /// The current path attributes from the UPDATE message where this path + /// was learned. + pub path_attributes: Vec, +} + +/// RouteUpdate is a type which encapsulates a newly learned, modified, or removed set of prefixes. +#[derive(Debug)] +pub enum RouteUpdate { + Announce(RouteAnnounce), + Withdraw(RouteWithdraw), +} + +#[derive(Debug)] +pub struct RouteAnnounce { + pub peer: String, + pub prefixes: Vec, + + pub local_pref: u32, + pub med: u32, + pub as_path: Vec, + pub nexthop: Vec, + + pub path_attributes: Vec, +} + +#[derive(Debug)] +pub struct RouteWithdraw { + pub peer: String, + pub prefixes: Vec, +} diff --git a/bgpd/src/server/mod.rs b/bgpd/src/server/mod.rs new file mode 100644 index 0000000..87022e8 --- /dev/null +++ b/bgpd/src/server/mod.rs @@ -0,0 +1,20 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod bgp_server; +pub mod config; +pub mod data_structures; +pub mod peer; +pub mod rib_manager; +pub mod route_server; diff --git a/bgpd/src/server/peer.rs b/bgpd/src/server/peer.rs new file mode 100644 index 0000000..547731a --- /dev/null +++ b/bgpd/src/server/peer.rs @@ -0,0 +1,1337 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::capabilities::{ + BGPCapability, BGPCapabilityTypeValues, BGPCapabilityValue, BGPOpenOptionTypeValues, + FourByteASNCapability, MultiprotocolCapability, OpenOption, OpenOptionCapabilities, + OpenOptions, +}; +use crate::bgp_packet::constants::{ + AddressFamilyIdentifier, SubsequentAddressFamilyIdentifier, AS_TRANS, +}; +use crate::bgp_packet::messages::BGPMessage; +use crate::bgp_packet::messages::BGPMessageTypeValues; +use crate::bgp_packet::messages::BGPMessageTypeValues::OPEN_MESSAGE; +use crate::bgp_packet::messages::BGPMessageTypeValues::UPDATE_MESSAGE; +use crate::bgp_packet::messages::BGPSubmessage; +use crate::bgp_packet::messages::Codec; +use crate::bgp_packet::messages::KeepaliveMessage; +use crate::bgp_packet::messages::NotificationMessage; +use crate::bgp_packet::messages::OpenMessage; +use crate::bgp_packet::messages::UpdateMessage; +use crate::bgp_packet::nlri::NLRI; +use crate::bgp_packet::path_attributes::ASPathAttribute; +use crate::bgp_packet::path_attributes::NextHopPathAttribute; +use crate::bgp_packet::path_attributes::OriginPathAttribute; +use crate::bgp_packet::path_attributes::PathAttribute; +use crate::bgp_packet::path_attributes::{ + LargeCommunitiesPathAttribute, LargeCommunitiesPayload, MPReachNLRIPathAttribute, +}; +use crate::bgp_packet::traits::ParserContext; +use crate::server::config::PrefixAnnouncement; +use crate::server::config::{PeerConfig, ServerConfig}; +use crate::server::data_structures::RouteAnnounce; +use crate::server::data_structures::RouteWithdraw; +use crate::server::data_structures::{RouteInfo, RouteUpdate}; +use crate::server::rib_manager::RouteManagerCommands; +use bytes::BytesMut; +use ip_network_table_deps_treebitmap::address::Address; +use ip_network_table_deps_treebitmap::IpLookupTable; +use std::convert::TryFrom; +use std::convert::TryInto; +use std::net::IpAddr; +use std::net::SocketAddr; +use std::sync::Arc; +use std::sync::RwLock; +use std::time::Duration; +use tokio::io::AsyncReadExt; +use tokio::io::AsyncWriteExt; +use tokio::net::tcp; +use tokio::net::TcpStream; +use tokio::sync::broadcast; +use tokio::sync::mpsc; +use tokio::sync::oneshot; +use tokio::sync::Mutex; +use tokio::task::JoinHandle; +use tokio_util::codec::{Decoder, Encoder}; +use tokio_util::sync::CancellationToken; +use tracing::{info, trace, warn}; + +type PeerInterface = mpsc::UnboundedSender; + +// Note on the threading model: Messages must be processed in order +// from the BGP peer, so we constrain PeerStateMachine to be called +// with updaates on a single thread only. Updating the state should +// not be expensive, and other tasks such as picking the best route +// will be done in a different threading model. + +/// PeerStatus contians the current state of the PSM for monitoring +/// and debugging. +#[derive(Clone, Debug)] +pub struct PeerStatus { + pub name: String, + pub config: PeerConfig, + pub state: BGPState, +} + +/// BGPState represents which state of the BGP state machine the peer +/// is currently in. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum BGPState { + /// Idle represents the configuration existing but not trying to + /// establish connections to or accept connections from the peer. + Idle, + /// Active represents a state where we are trying to establish a + /// connection to the peer. + Active, + /// Connect represents a state where we have intiiated a TCP + /// connection to the peer. + Connect, + /// OpenSent represents a state where we have sent a BGP OPEN + /// message to the peer and are waiting for the corresponding + /// OPEN message back. + OpenSent, + /// OpenConfirm represents a state where we have sent a + /// KEEPALIVE message to the peer after the exachange of OPEN + /// messages, and are waiting for the corresponding KEEPALIVE. + OpenConfirm, + /// Established represents the steady state of an ongoing + /// BGP session where routes are being exchanged. + Established, +} + +// PeerStateMachine has two interfaces, one to the PeerConnector and +// another to the RIBManager. +#[derive(Debug)] +pub enum PeerCommands { + // NewConnection is used to pass a fresh inbound connection + // to this instance. + NewConnection(TcpStream), + // ConnectionClosed indicates that the connection to the peer + // has been lost, and state cleanup should be triggered. + ConnectionClosed(), + + SendNotification(NotificationMessage), + + // Send an UPDATE message to the peer. + Announce(RouteUpdate), + + // Internal events for the PeerStateMachine itself + MessageFromPeer(BGPSubmessage), + + TimerEvent(PeerTimerEvent), + + // Adds a community to all announcements. + AddLargeCommunity((u32, u32), oneshot::Sender), + RemoveLargeCommunity((u32, u32), oneshot::Sender), + + // GetStatus is a crude hack to get a status string out of the PSM for debugging. + GetStatus(oneshot::Sender), +} + +#[derive(Copy, Clone, Debug)] +pub enum PeerTimerEvent { + ConnectTimerExpire(), + HoldTimerExpire(), + KeepaliveTimerExpire(), +} + +async fn run_timer( + cancel_token: CancellationToken, + iface: PeerInterface, + event: PeerTimerEvent, + after: tokio::time::Duration, +) { + loop { + tokio::select! { + _ = cancel_token.cancelled() => { + info!("run_timer was cancelled"); + return; + }, + _ = tokio::time::sleep(after) => { + info!("Sending timer event: {:?}", event); + match iface.send(PeerCommands::TimerEvent(event)) { + Ok(_) => {} + Err(e) => { + warn!("Failed to send timer message to PeerStateMachine: {}, abort run_timer", e); + return; + } + } + } + } + } +} + +// check_hold_timer tries to poll the last_msg_time every second +// to see if the time is past the hold time. +async fn check_hold_timer( + cancel_token: CancellationToken, + iface: PeerInterface, + last_msg_time: Arc>, + hold_time: std::time::Duration, +) { + loop { + tokio::select! { + _ = cancel_token.cancelled() => { + info!("check_hold_timer was cancelled"); + return; + } + _ = tokio::time::sleep(std::time::Duration::from_secs(1)) => { + let last = last_msg_time.read().unwrap(); + let elapsed_time = std::time::SystemTime::now().duration_since(*last); + match elapsed_time { + Ok(duration) => { + if duration > hold_time { + match iface.send(PeerCommands::TimerEvent(PeerTimerEvent::HoldTimerExpire())) { + Ok(()) => {}, + Err(e) => { + warn!("Failed to send HoldTimerExpire message: {}", e); + } + } + // Exit the hold timer task since it's expired already and is not needed anymore. + return; + } + } + Err(e) => { + warn!("Failed to check duration since last message: {}", e); + } + } + } + + } + } +} + +// parse_incoming_msgs reads messages from a TCP socket and dispatches the parsed +// BGP messages to the PeerInterface. +async fn parse_incoming_msgs( + cancel_token: CancellationToken, + conn: &mut tcp::OwnedReadHalf, + iface: PeerInterface, + codec: &mut Arc>, +) -> Result<(), std::io::Error> { + let mut buf = BytesMut::new(); + loop { + tokio::select! { + _ = cancel_token.cancelled() => { + info!("check_hold_timer was cancelled"); + return Ok(()); + } + len_res = conn.read_buf(&mut buf) => { + match len_res { + Err(e) => { + warn!("Failed to read from buf: {}", e); + // Send a message that the connection has been closed. + iface + .send(PeerCommands::ConnectionClosed()) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?; + return Err(e); + } + Ok(len) => { + if len == 0 { + while let Some(frame) = codec.lock().await.decode_eof(&mut buf)? { + iface + .send(PeerCommands::MessageFromPeer(frame.payload)) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?; + } + iface + .send(PeerCommands::ConnectionClosed()) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?; + info!("Exiting handler due to connection close"); + return Ok(()); + } + + while let Some(frame) = codec.lock().await.decode(&mut buf)? { + iface + .send(PeerCommands::MessageFromPeer(frame.payload)) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?; + } + } + } + } + } + } +} + +/// generate_open_message creates an open message for the provided peer. +fn generate_open_message(server_config: &ServerConfig, peer_config: &PeerConfig) -> OpenMessage { + let mut res = OpenMessage { + version: 4, + asn: AS_TRANS, + hold_time: server_config.hold_time, + identifier: server_config.identifier, + options: Vec::new(), + }; + + // Four byte ASN. + let asn_cap = FourByteASNCapability { + asn: server_config.asn, + }; + + // Multiprotocol. + let multiprotocol_cap = MultiprotocolCapability { + afi: peer_config.afi, + safi: peer_config.safi, + }; + + res.options.push(OpenOption { + option_type: BGPOpenOptionTypeValues::CAPABILITIES, + oval: OpenOptions::Capabilities(OpenOptionCapabilities { + caps: vec![ + BGPCapability { + cap_type: BGPCapabilityTypeValues::FOUR_BYTE_ASN, + val: BGPCapabilityValue::FourByteASN(asn_cap), + }, + BGPCapability { + cap_type: BGPCapabilityTypeValues::MULTPROTOCOL_BGP4, + val: BGPCapabilityValue::Multiprotocol(multiprotocol_cap), + }, + ], + }), + }); + + res +} + +/// PeerStateMachine encapsulates the state of a particular peer. +/// Type parameter A refers to the type of address this peer is +/// tracking, can be Ipv4Addr or Ipv6Addr as those are the types +/// supported by treebitmap. +pub struct PeerStateMachine { + // server_config is the server wide config that we use here for + // reading global options. + server_config: ServerConfig, + /// The current configuration for this peer. + // To apply a new configuration the peer must be shutdown and + // restarted so that the new configuration can take effect. + config: PeerConfig, + // Store the peer's open message so we can reference it. + peer_open_msg: Option, + + /// Current state of this peer. + pub state: BGPState, + + tcp_stream: Option, + + codec: Arc>, + + /// ADJ-RIB for the peer. + /// The RouteInfo object contians information as to whether the + /// prefix was actually accepted and + /// the whole structure represents ADJ-RIB-IN. + prefixes_in: IpLookupTable>, + + // prefixes_out contains the routes we want to export to the peer. + // TODO: Use this. + //prefixes_out: IpLookupTable, + + // Interface to this state machine + pub iface_rx: mpsc::UnboundedReceiver, + pub iface_tx: mpsc::UnboundedSender, + + // Interfaces to the rest of the daemon. + /// rib_in is a channel to the route processor, all accepted + /// updates from the peer go to rib_in. + route_manager: mpsc::UnboundedSender>, + + // Keep track of the time of the last message to efficiently implement + // the hold timer. + last_msg_time: Arc>, + + // Timers and cancellation token to spawned tasks + connect_timer: Option<(JoinHandle<()>, CancellationToken)>, + hold_timer: Option<(JoinHandle<()>, CancellationToken)>, + keepalive_timer: Option<(JoinHandle<()>, CancellationToken)>, + read_cancel_token: Option, + + shutdown: broadcast::Receiver<()>, +} + +impl PeerStateMachine +where + NLRI: TryInto, + >::Error: ToString, + A: std::fmt::Debug, +{ + pub fn new( + server_config: ServerConfig, + config: PeerConfig, + iface_rx: mpsc::UnboundedReceiver, + iface_tx: mpsc::UnboundedSender, + route_manager: mpsc::UnboundedSender>, + shutdown: broadcast::Receiver<()>, + ) -> PeerStateMachine { + let afi = config.afi; + PeerStateMachine { + server_config, + config, + peer_open_msg: None, + state: BGPState::Active, + tcp_stream: None, + codec: Arc::new(Mutex::new(Codec { + ctx: ParserContext { + four_octet_asn: None, + nlri_mode: Some(afi), + }, + })), + prefixes_in: IpLookupTable::new(), + iface_rx, + iface_tx, + route_manager, + last_msg_time: Arc::new(RwLock::new(std::time::SystemTime::UNIX_EPOCH)), + connect_timer: None, + hold_timer: None, + keepalive_timer: None, + read_cancel_token: None, + shutdown, + } + } + + // run implements the main loop of the peer state machine and drives the + // events relating to this particular peer. + pub async fn run(&mut self) { + // TODO: Wire up other spawned tasks into the shutdown signal. + // Initialize connect timer. + { + let token = CancellationToken::new(); + let token_copy = token.clone(); + let chan = self.iface_tx.clone(); + let connect_timer = tokio::spawn(async move { + run_timer( + token_copy, + chan, + PeerTimerEvent::ConnectTimerExpire(), + std::time::Duration::from_secs(5), + ) + .await; + }); + + self.connect_timer = Some((connect_timer, token)); + } + + loop { + let next = tokio::select! { + cmd = self.iface_rx.recv() => cmd, + _ = self.shutdown.recv() => { + warn!("PSM shutting down due to shutdown signal."); + return; + }, + }; + match next { + Some(msg) => match self.handle_chan_msg(msg).await { + Ok(_) => {} + Err(e) => { + warn!( + "Failed to handle message on peer state machine channel: {}", + e + ); + } + }, + None => { + warn!("PeerStateMachine channel broken!"); + return; + } + } + } + } + + async fn handle_chan_msg(&mut self, c: PeerCommands) -> Result<(), std::io::Error> { + match c { + PeerCommands::NewConnection(mut conn) => { + let peer_addr = conn.peer_addr()?; + info!("Handling connection from peer: {}", peer_addr); + // Check that the state machine is in the right state for accepting + // a connection. + if self.state != BGPState::Active && self.state != BGPState::Connect { + info!( + "Dropping connection from peer because PSM is in state: {:?}", + self.state + ); + // Just let conn be dropped here, that closes it. + return Ok(()); + }; + + // Disable connect timer + match &self.connect_timer { + Some((_join_handle, cancel_token)) => { + cancel_token.cancel(); + self.connect_timer = None; + } + None => {} + } + + // Generate the OPEN message and send it to the peer. + let open_msg = generate_open_message(&self.server_config, &self.config); + let bgp_message = BGPMessage { + msg_type: OPEN_MESSAGE, + payload: BGPSubmessage::OpenMessage(open_msg), + }; + let mut buf = BytesMut::new(); + self.codec.lock().await.encode(bgp_message, &mut buf)?; + conn.write(&buf).await?; + + // Update state + self.state = BGPState::OpenSent; + + // Split the TCP connection into onwed read and write halves. + let (mut read_half, write_half) = conn.into_split(); + self.tcp_stream = Some(write_half); + + // Spawn a task to listen + let chan = self.iface_tx.clone(); + let mut codec = self.codec.clone(); + let peer_name = self.config.name.clone(); + + // Spawn a worker task to receive messages from the peer. + // If the connection gets closed, then a ConnectionClosed message is sent + // on chan so handle_chan_msg can clean up the state. + let read_cancel_token = CancellationToken::new(); + self.read_cancel_token = Some(read_cancel_token.clone()); + tokio::spawn(async move { + match parse_incoming_msgs(read_cancel_token, &mut read_half, chan, &mut codec) + .await + { + Ok(_) => info!("reader task shutdown for peer: {}", peer_name), + Err(e) => warn!( + "reader task for peer {} exited with error: {}", + peer_name, e + ), + } + }); + } + + // When the connection is lost, we need to reset the state of the PSM, + // and clear the connection related variables out. Note that we do not + // remove any routes because that should only be done when the hold timer + // expires. + PeerCommands::ConnectionClosed() => { + self.connection_closed().await?; + } + + PeerCommands::SendNotification(notification) => { + self.send_notification(notification).await? + } + + PeerCommands::Announce(_) => { + todo!(); + } + + PeerCommands::AddLargeCommunity(c, sender) => { + for mut a in self.config.announcements.iter_mut() { + if let Some(lcs) = a.large_communities.as_mut() { + lcs.push(format!("{}:{}:{}", self.config.asn, c.0, c.1)); + } else { + a.large_communities = + Some(vec![format!("{}:{}:{}", self.config.asn, c.0, c.1)]); + } + } + for a in &self.config.announcements.clone() { + if let Err(e) = self.announce_static(&a).await { + if let Err(se) = sender.send(e) { + warn!("Failed to send to sender: {}", se); + } + return Ok(()); + } + } + if let Err(se) = sender.send("Ok".to_string()) { + warn!("Failed to send to sender: {}", se); + } + } + PeerCommands::RemoveLargeCommunity(c, sender) => { + let communities_str = format!("{}:{}:{}", self.config.asn, c.0, c.1); + for a in self.config.announcements.iter_mut() { + if let Some(lcs) = a.large_communities.as_mut() { + lcs.retain(|e| *e != communities_str); + } + } + for a in &self.config.announcements.clone() { + if let Err(e) = self.announce_static(&a).await { + if let Err(se) = sender.send(e) { + warn!("Failed to send to sender: {}", se); + } + return Ok(()); + } + } + if let Err(se) = sender.send("Ok".to_string()) { + warn!("Failed to send to sender: {}", se); + } + } + + PeerCommands::MessageFromPeer(msg) => match self.handle_msg(msg).await { + Ok(_) => { + // Update the last time counter + // We call unwrap here because it indicates that some other thread which + // was accessing the lock had a panic. + // TODO: This should be handled more gracefully, maybe by shutting down the + // peer and starting it up again. + let mut last_time_lock = (*self.last_msg_time).write().unwrap(); + *last_time_lock = std::time::SystemTime::now(); + } + Err(e) => { + return Err(std::io::Error::new(std::io::ErrorKind::Other, e)); + } + }, + PeerCommands::TimerEvent(timer_event) => match timer_event { + // When the connect timer expires we want to try and initiate + // a new connection to the peer. + PeerTimerEvent::ConnectTimerExpire() => { + info!("Connect timer expired"); + match self.try_connect(Duration::from_secs(3)).await { + Ok(conn) => { + info!("Successfully connected to {}", self.config.ip); + self.iface_tx + .send(PeerCommands::NewConnection(conn)) + .map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::Other, + "Failed to send message on channel", + ) + })?; + // Disable connect timer. + match &self.connect_timer { + Some((_join_handle, cancel_token)) => { + cancel_token.cancel(); + self.connect_timer = None; + } + None => {} + } + } + Err(e) => { + warn!( + "Connection attempt to peer {} failed: {}", + self.config.ip, e + ) + } + } + } + PeerTimerEvent::HoldTimerExpire() => { + trace!("Hold timer expired"); + self.hold_timer_expired().await?; + } + PeerTimerEvent::KeepaliveTimerExpire() => { + trace!("Keepalive timer expired"); + self.send_keepalive().await?; + } + }, + PeerCommands::GetStatus(sender) => { + let state = PeerStatus { + name: self.config.name.clone(), + config: self.config.clone(), + state: self.state, + }; + match sender.send(state) { + Ok(()) => {} + Err(e) => { + warn!( + "PeerCommands::GetStatus: Failed to send state back to requester: {:?}", + e + ) + } + } + } + } + Ok(()) + } + + async fn send_notification( + &mut self, + notification: NotificationMessage, + ) -> Result<(), std::io::Error> { + let mut buf = BytesMut::new(); + let bgp_msg = BGPMessage { + msg_type: BGPMessageTypeValues::NOTIFICATION_MESSAGE, + payload: BGPSubmessage::NotificationMessage(notification), + }; + self.codec.lock().await.encode(bgp_msg, &mut buf)?; + match self.tcp_stream.as_mut() { + Some(stream) => { + stream.write(&buf).await?; + } + None => warn!("Dropped notification message to peer"), + } + Ok(()) + } + + // connection_closed handles the case where the peer connection has been terminated. + // It deallocates the resources in this peer, unsets the TCP connection, removes the + // routes from the inner structure as well as the routes that were propagated into the + // RIB. + async fn connection_closed(&mut self) -> Result<(), std::io::Error> { + info!("Connection closed on peer {}", self.config.name); + + // Cancel keepalive timer. + match &self.keepalive_timer { + Some((_join_handle, cancel_token)) => { + cancel_token.cancel(); + } + None => {} + } + + // Cancel the reading task. + if let Some(cancel_token) = &self.read_cancel_token { + cancel_token.cancel(); + } + + // Close the TCP stream. + if let Some(stream) = self.tcp_stream.as_mut() { + match stream.shutdown().await { + Ok(_) => info!("Closed TCP stream with peer: {}", self.config.name), + Err(e) => warn!( + "Failed to close TCP stream with peer {}: {}", + self.config.name, + e.to_string() + ), + } + } + + // Iterate over every route that we've announced to the route manager + // and withdraw it. + let mut route_withdraw = RouteWithdraw { + peer: self.config.name.clone(), + prefixes: vec![], + }; + + for prefix in self.prefixes_in.iter_mut() { + route_withdraw.prefixes.push(prefix.2.nlri.clone()); + } + + self.route_manager + .send(RouteManagerCommands::Update(RouteUpdate::Withdraw( + route_withdraw, + ))) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::BrokenPipe, e.to_string()))?; + + // Clear prefixes_in. + self.prefixes_in = IpLookupTable::new(); + + // Set the state machine back to the expected. + self.state = BGPState::Active; + + // Restart the connect timer to try and connect periodically. + { + let token = CancellationToken::new(); + let token_copy = token.clone(); + let chan = self.iface_tx.clone(); + let connect_timer = tokio::spawn(async move { + run_timer( + token_copy, + chan, + PeerTimerEvent::ConnectTimerExpire(), + std::time::Duration::from_secs(10), + ) + .await; + }); + + self.connect_timer = Some((connect_timer, token)); + } + + Ok(()) + } + + /// process_withdrawals creates a RouteUpdate from withdrawal announcments and sends + /// them to the rib_in channel to be consumed by the route processor. + fn process_withdrawals(&mut self, withdrawals: Vec) -> Result<(), String> { + let mut route_withdraw = RouteWithdraw { + peer: self.config.name.clone(), + prefixes: vec![], + }; + for nlri in withdrawals { + let addr: A = nlri.clone().try_into().map_err(|e| e.to_string())?; + + // remove from prefixes if present. + self.prefixes_in.remove(addr, nlri.prefixlen.into()); + + route_withdraw.prefixes.push(nlri); + } + + if route_withdraw.prefixes.len() > 0 { + self.route_manager + .send(RouteManagerCommands::Update(RouteUpdate::Withdraw( + route_withdraw, + ))) + .map_err(|e| e.to_string())?; + } + + Ok(()) + } + + /// process_announcements creates a RouteUpdate from the announced NLRIs and path attributes + /// and sends them to the rib_in channel to be consumed by the route processor. + fn process_announcements( + &mut self, + nexthop: Vec, + announcements: Vec, + path_attributes: Vec, + ) -> Result<(), String> { + let mut as_path: Vec = vec![]; + let mut med: u32 = 0; + for attr in &path_attributes { + match attr { + PathAttribute::ASPathAttribute(aspa) => { + for segment in &aspa.segments { + for asn in &segment.path { + as_path.push(*asn); + } + } + } + PathAttribute::MultiExitDiscPathAttribute(med_attr) => { + med = med_attr.0; + } + _ => {} + } + } + + let mut route_update = RouteAnnounce { + local_pref: self.config.local_pref, + med, + nexthop, + as_path, + path_attributes, + peer: self.config.name.clone(), + prefixes: vec![], + }; + + for announcement in announcements { + let addr: A = announcement.clone().try_into().map_err(|e| e.to_string())?; + // Should we accept this prefix? + let accepted: bool = self.decide_accept_prefix(addr, announcement.prefixlen); + let rejection_reason: Option = match accepted { + true => Some("Filtered by policy".to_owned()), + false => None, + }; + + // Note that this logic assumes accepted routes remain accepted and the converse. + // If this is to support live updates of filters the assumptions will need to be + // revisited. + match self + .prefixes_in + .exact_match(addr, announcement.prefixlen.into()) + { + Some(route_info) => { + // Update the route_info, we need to clone it then reassign. + let mut new_route_info: RouteInfo = route_info.clone(); + new_route_info.path_attributes = route_update.path_attributes.clone(); + new_route_info.updated = std::time::SystemTime::now(); + self.prefixes_in + .insert(addr, announcement.prefixlen.into(), new_route_info); + } + None => { + // Insert new RouteInfo + let route_info = RouteInfo:: { + prefix: addr, + prefixlen: announcement.prefixlen, + nlri: announcement.clone(), + accepted, + rejection_reason, + learned: std::time::SystemTime::now(), + updated: std::time::SystemTime::now(), + path_attributes: route_update.path_attributes.clone(), + }; + self.prefixes_in + .insert(addr, announcement.prefixlen.into(), route_info); + } + } + + if accepted { + route_update.prefixes.push(announcement); + } + } + + if !route_update.prefixes.is_empty() { + self.route_manager + .send(RouteManagerCommands::Update(RouteUpdate::Announce( + route_update, + ))) + .map_err(|e| e.to_string())?; + } + + Ok(()) + } + + fn decide_accept_prefix(&mut self, _: A, _: u8) -> bool { + // TODO: Implement filtering of prefixes. + true + } + + fn decide_accept_message(&mut self, _: &[PathAttribute]) -> bool { + // TODO: Implement filtering of Update messages. + + // TODO: Section 9.1.2 of RFC 4271: + // * Reject the message if the next hop is not resolvable + // * Reject the message if there is an AS loop + true + } + + /// try_connect attempts to connect to a remote TCP endpoint with a given timeout. + async fn try_connect(&mut self, timeout: Duration) -> Result { + let addr = self.config.ip; + let port = self.config.port.unwrap_or(179); + let sockaddr = SocketAddr::new(addr, port); + + let std_stream = std::net::TcpStream::connect_timeout(&sockaddr, timeout)?; + std_stream.set_nonblocking(true)?; + Ok(TcpStream::from_std(std_stream)?) + } + + /// send_keepalive checks if the peer connection is still established and sends a + /// keepalive message. + /// Takes a lock on the peer object. + async fn send_keepalive(&mut self) -> Result<(), std::io::Error> { + info!("Sending keepalive"); + match self.tcp_stream.as_mut() { + Some(conn) => { + let keepalive = BGPMessage { + msg_type: BGPMessageTypeValues::KEEPALIVE_MESSAGE, + payload: BGPSubmessage::KeepaliveMessage(KeepaliveMessage {}), + }; + let mut buf = BytesMut::new(); + self.codec.lock().await.encode(keepalive, &mut buf)?; + conn.write(buf.as_ref()).await?; + Ok(()) + } + None => Err(std::io::Error::new( + std::io::ErrorKind::Other, + "Called send_keepalive with no connection set", + )), + } + } + + /// handle_msg processes incoming messages and updates the state in PeerStateMachine. + async fn handle_msg(&mut self, msg: BGPSubmessage) -> Result<(), String> { + match &self.state { + BGPState::Idle => self.handle_idle_msg().await, + BGPState::Active => self.handle_active_msg(msg).await, + BGPState::Connect => self.handle_connect_msg(msg).await, + BGPState::OpenSent => self.handle_opensent_msg(msg).await, + BGPState::OpenConfirm => self.handle_openconfirm_msg(msg).await, + BGPState::Established => self.handle_established_msg(msg).await, + } + } + + async fn handle_idle_msg(&mut self) -> Result<(), String> { + Err("Peer cannot process messages when in the Idle state".to_string()) + } + + async fn handle_active_msg(&mut self, msg: BGPSubmessage) -> Result<(), String> { + // In the active state a new connection should come in via the NewConnection + // message on the PSM channel, or if we establish a connection out, then that + // logic should handle the messages until OpenSent. + return Err(format!( + "Discarding message received in ACTIVE state: {:?}", + msg + )); + } + + async fn handle_connect_msg(&mut self, msg: BGPSubmessage) -> Result<(), String> { + // In the connect state a new connection should come in via the NewConnection + // message on the PSM channel, or if we establish a connection out, then that + // logic should handle the messages until OpenSent. + return Err(format!( + "Discarding message received in CONNECT state: {:?}", + msg + )); + } + + // In the opensent state we still need to get the OPEN message from the peer + async fn handle_opensent_msg(&mut self, msg: BGPSubmessage) -> Result<(), String> { + info!("Handling message in OpenSent state: {:?}", msg); + match msg { + BGPSubmessage::OpenMessage(o) => { + // Check that the peer has the right ASN set + if u32::from(o.asn) != self.config.asn && o.asn != AS_TRANS { + warn!( + "peer {} did not use AS_TRANS or actual ASN: {}, closing conn", + self.config.name, o.asn + ); + self.state = BGPState::Active; + if let Some(stream) = self.tcp_stream.as_mut() { + stream.shutdown().await.map_err(|e| e.to_string())?; + } + } + + // Unpack ASN option and assert correctness. + let mut as4_cap: Option = None; + for option in &o.options { + match &option.oval { + OpenOptions::Capabilities(caps) => { + for cap in &caps.caps { + if let BGPCapabilityValue::FourByteASN(v) = &cap.val { + as4_cap = Some(v.clone()); + } + } + } + } + } + + fn notify_error_close( + error_code: u8, + error_subcode: u8, + iface_tx: &mut mpsc::UnboundedSender, + ) -> Result<(), String> { + let notification = NotificationMessage { + error_code, + error_subcode, + data: vec![], + }; + iface_tx + .send(PeerCommands::SendNotification(notification)) + .map_err(|e| e.to_string())?; + iface_tx + .send(PeerCommands::ConnectionClosed()) + .map_err(|e| e.to_string())?; + Ok(()) + } + + match as4_cap { + Some(cap) => { + // We have to set the AS4 option on the BGP message parser. + self.codec.lock().await.ctx.four_octet_asn = Some(true); + if cap.asn != self.config.asn { + warn!( + "Got non-matching ASN from peer: {} want: {}", + cap.asn, self.config.asn + ); + notify_error_close(2, 2, &mut self.iface_tx)?; + } + } + None => { + // Reject connection by sending notification then queue a close. + notify_error_close(2, 4, &mut self.iface_tx)?; + } + } + + // Assert that the right MultiProtocol options are set + // TODO: Handle the case where there is more than one multiprotocol cap set. + let mut mp_cap: Option = None; + for option in &o.options { + match &option.oval { + OpenOptions::Capabilities(caps) => { + for cap in &caps.caps { + if let BGPCapabilityValue::Multiprotocol(mp) = &cap.val { + mp_cap = Some(mp.clone()); + } + } + } + } + } + + match mp_cap { + Some(cap) => { + if cap.afi != self.config.afi { + warn!( + "Mismatched multiprotocol AFI, got: {}, want: {}", + cap.afi, self.config.afi + ); + return notify_error_close(2, 4, &mut self.iface_tx); + } + if cap.safi != self.config.safi { + warn!( + "Mismatched multiprotocol SAFI, got: {}, want: {}", + cap.safi, self.config.safi + ); + return notify_error_close(2, 4, &mut self.iface_tx); + } + } + None => { + warn!("No multiptotocol capability found, closing conn"); + return notify_error_close(2, 4, &mut self.iface_tx); + } + } + + // Ensure that the hold time is set to an acceptable value accoring to + // https://datatracker.ietf.org/doc/html/rfc4271#section-6.2 + match o.hold_time { + 1 | 2 => { + return notify_error_close(2, 6, &mut self.iface_tx); + } + _ => {} + } + + // Store the open message for reference / debugging. + self.peer_open_msg = Some(o); + + // Send the Keepalive message and transition to OpenConfirm. + self.send_keepalive().await.map_err(|e| e.to_string())?; + self.state = BGPState::OpenConfirm; + + Ok(()) + } + _ => Err("Got non-open message in state opensent".to_string()), + } + } + + // In the openconfirm state we are waiting for a KEEPALIVE from the peer. + async fn handle_openconfirm_msg(&mut self, msg: BGPSubmessage) -> Result<(), String> { + // In the openconfirm state we wait for a keepalive message from the peer. + // We also compute the timer expiry time for the keepalive timer. + // Hold time of 0 means no keepalive and hold timer. + let hold_time = match &self.peer_open_msg { + Some(o) => o.hold_time, + None => { + return Err( + "Logic error: reached handle_openconfirm without a open message set" + .to_string(), + ); + } + }; + match msg { + BGPSubmessage::KeepaliveMessage(_) => { + // Switch the state from OpenConfirm to ESTABLISHED. + self.state = BGPState::Established; + + if hold_time > 0 { + // Set keepalive timer. + let keepalive_duration = hold_time / 3; + info!( + "Using keepalive duration of {} for peer {}", + keepalive_duration, self.config.name + ); + { + let token = CancellationToken::new(); + let token_copy = token.clone(); + let chan = self.iface_tx.clone(); + let keepalive_timer = tokio::spawn(async move { + run_timer( + token_copy, + chan, + PeerTimerEvent::KeepaliveTimerExpire(), + std::time::Duration::from_secs(keepalive_duration.into()), + ) + .await; + }); + + self.keepalive_timer = Some((keepalive_timer, token)); + } + + // Set hold timer. + { + let token = CancellationToken::new(); + let token_copy = token.clone(); + let chan = self.iface_tx.clone(); + let last_msg_time = self.last_msg_time.clone(); + let hold_timer = tokio::spawn(async move { + check_hold_timer( + token_copy, + chan, + last_msg_time, + std::time::Duration::from_secs(hold_time.into()), + ) + .await + }); + + self.hold_timer = Some((hold_timer, token)); + } + }; + + // TODO: Should not have to clone here? + let announcements: Vec = self.config.announcements.clone(); + for announcement in announcements { + self.announce_static(&announcement).await?; + } + + Ok(()) + } + _ => Err(format!( + "Got unsupported message type in handle_openconfirm_msg: {:?}", + msg + )), + } + } + + async fn hold_timer_expired(&mut self) -> Result<(), std::io::Error> { + let notification = NotificationMessage { + error_code: 4, + error_subcode: 0, + data: vec![], + }; + + self.send_notification(notification).await?; + self.connection_closed().await?; + + Ok(()) + } + + async fn announce_static(&mut self, announcement: &PrefixAnnouncement) -> Result<(), String> { + let mut bgp_update_msg = UpdateMessage { + withdrawn_nlri: vec![], + announced_nlri: vec![], + path_attributes: vec![], + }; + + // Origin, TODO: configure this based on i/eBGP + bgp_update_msg + .path_attributes + .push(PathAttribute::OriginPathAttribute(OriginPathAttribute(1))); + + bgp_update_msg + .path_attributes + .push(ASPathAttribute::from_asns(vec![self.server_config.asn])); + + match self.config.afi { + AddressFamilyIdentifier::Ipv4 => { + match announcement.nexthop { + IpAddr::V4(nh) => { + bgp_update_msg + .path_attributes + .push(PathAttribute::NextHopPathAttribute(NextHopPathAttribute( + nh, + ))) + } + _ => return Err("Found non IPv4 nexthop in announcement".to_string()), + } + + let nlri = NLRI::try_from(announcement.prefix.clone())?; + bgp_update_msg.announced_nlri.push(nlri); + } + AddressFamilyIdentifier::Ipv6 => { + let nexthop_octets = match announcement.nexthop { + IpAddr::V6(nh) => nh.octets().to_vec(), + _ => { + return Err("Found non IPv6 nexthop in announcement".to_string()); + } + }; + let nlri = NLRI::try_from(announcement.prefix.clone())?; + let mp_reach = MPReachNLRIPathAttribute { + afi: AddressFamilyIdentifier::Ipv6, + safi: SubsequentAddressFamilyIdentifier::Unicast, + nexthop: nexthop_octets, + nlris: vec![nlri], + }; + bgp_update_msg + .path_attributes + .push(PathAttribute::MPReachNLRIPathAttribute(mp_reach)); + } + } + + if let Some(large_communities) = &announcement.large_communities { + let mut large_communities_attr = LargeCommunitiesPathAttribute { values: vec![] }; + for large_community in large_communities { + let parts: Vec = large_community + .split(':') + .flat_map(|x| x.parse::()) + .collect(); + if parts.len() != 3 { + warn!("Failed to parse large community: {}", large_community); + } + let payload = LargeCommunitiesPayload { + global_admin: parts[0], + ld1: parts[1], + ld2: parts[2], + }; + large_communities_attr.values.push(payload); + } + bgp_update_msg + .path_attributes + .push(PathAttribute::LargeCommunitiesPathAttribute( + large_communities_attr, + )); + } + + let bgp_message = BGPMessage { + msg_type: UPDATE_MESSAGE, + payload: BGPSubmessage::UpdateMessage(bgp_update_msg), + }; + + info!("Sending static announcement to peer: {:?}", bgp_message); + + let mut buf = BytesMut::new(); + self.codec + .lock() + .await + .encode(bgp_message, &mut buf) + .map_err(|e| format!("failed to encode BGP message: {}", e))?; + + if let Some(stream) = self.tcp_stream.as_mut() { + stream + .write(&buf) + .await + .map_err(|e| format!("Failed to write msg to peer: {}", e))?; + } + Ok(()) + } + + // In the established state we accept Update, Keepalive and Notification messages. + async fn handle_established_msg(&mut self, msg: BGPSubmessage) -> Result<(), String> { + match msg { + BGPSubmessage::UpdateMessage(u) => { + if !self.decide_accept_message(&u.path_attributes) { + info!( + "Rejected message due to path attributes: {:?}", + u.path_attributes + ); + } + + // Have a seperate path for calling Multiprotocol NLRI processing. + for attr in &u.path_attributes { + match attr { + PathAttribute::MPReachNLRIPathAttribute(nlri) => { + let nexthop_res = nlri.clone().nexthop_to_v6(); + // TODO: How do we pick whether to use the global or LLNH? + if let Some((global, _llnh_opt)) = nexthop_res { + self.process_announcements( + global.octets().to_vec(), + nlri.nlris.clone(), + u.path_attributes.clone(), + )?; + } + } + PathAttribute::MPUnreachNLRIPathAttribute(nlri) => { + self.process_withdrawals(nlri.nlris.clone())?; + } + _ => {} + } + } + + if !u.withdrawn_nlri.is_empty() { + self.process_withdrawals(u.withdrawn_nlri)?; + } + if !u.announced_nlri.is_empty() { + let mut nexthop_option: Option = None; + for attr in &u.path_attributes { + if let PathAttribute::NextHopPathAttribute(nh_attr) = attr { + nexthop_option = Some(nh_attr.clone()); + } + } + match nexthop_option { + Some(nexthop) => { + self.process_announcements( + nexthop.0.octets().to_vec(), + u.announced_nlri, + u.path_attributes, + )?; + } + None => { + warn!( + "Got announced NLRI from peer {} without any nexthop", + self.config.name + ); + // TODO: Send a notification to the peer in this case. + } + } + } + + Ok(()) + } + BGPSubmessage::NotificationMessage(n) => { + info!( + "Got notification message from peer {}: {}", + self.config.name, n + ); + Ok(()) + } + BGPSubmessage::KeepaliveMessage(_) => Ok(()), + _ => Err(format!("Got unexpected message from peer: {:?}", msg)), + } + } +} diff --git a/bgpd/src/server/rib_manager.rs b/bgpd/src/server/rib_manager.rs new file mode 100644 index 0000000..9eac5c1 --- /dev/null +++ b/bgpd/src/server/rib_manager.rs @@ -0,0 +1,382 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::nlri::NLRI; +use crate::server::data_structures::RouteAnnounce; +use std::collections::BTreeMap; + +use crate::bgp_packet::path_attributes::PathAttribute; +use crate::server::config::PeerConfig; +use crate::server::data_structures::RouteUpdate; +use crate::server::peer::PeerCommands; + +use tracing::{info, trace, warn}; + +use std::cmp::Eq; +use std::collections::HashMap; +use std::convert::TryInto; + +use ip_network_table_deps_treebitmap::address::Address; +use serde::Serialize; +use std::sync::Mutex; +use tokio::sync::broadcast; +use tokio::sync::mpsc; +use tokio::sync::oneshot; + +use super::data_structures::RouteWithdraw; + +type PeerInterface = mpsc::UnboundedSender; + +/// Path is a structure to contain a specific route via one nexthop. +/// Note that currently there is an assumption that there is only +/// one route per peer per prefix, but when ADD-PATH support is added +/// this will no longer hold true. +#[derive(Debug, Clone, Serialize)] +pub struct Path { + pub nexthop: Vec, + pub peer_name: String, + pub local_pref: u32, + pub med: u32, + pub as_path: Vec, + pub path_attributes: Vec, +} + +impl PartialEq for Path { + fn eq(&self, other: &Path) -> bool { + // Local pref + if self.local_pref > other.local_pref { + return true; + } + // AS path length + if self.as_path.len() < other.as_path.len() { + return true; + } + + // TODO: Origin + + // MED lower is better + if self.med < other.med { + return true; + } + + // Use peer name as discriminator of last resort + self.peer_name < other.peer_name + } +} + +impl Eq for Path {} + +#[derive(Debug, Clone, Serialize)] +pub struct PathSet { + pub addr: A, + pub prefixlen: u8, + pub nlri: NLRI, + // paths is stored in a BTreeMap which is sorted and allows us to efficiently + // find the best path. + pub paths: BTreeMap, +} + +/// RibSnapshot contians a version number and the dump of all the routes. +#[derive(Debug, Serialize)] +pub struct RibSnapshot { + pub epoch: u64, + pub routes: Vec>, +} + +pub enum RouteManagerCommands { + Update(RouteUpdate), + /// DumpRib returns the view of the RIB at the current epoch. + DumpRib(oneshot::Sender>), + /// StreamRib will send all the routes currently in the RIB then stream updates. + StreamRib( + mpsc::UnboundedSender<(u64, PathSet)>, + oneshot::Sender)>>, + ), +} + +pub struct RibManager { + mgr_rx: mpsc::UnboundedReceiver>, + peers: HashMap, + + // We need to use a mutex for PathSet because IpLookupTable does not return a mut ptr. + rib: ip_network_table_deps_treebitmap::IpLookupTable>>, + epoch: u64, + + // Handle for streaming updates to PathSets in the RIB. + pathset_streaming_handle: broadcast::Sender<(u64, PathSet)>, + + shutdown: broadcast::Receiver<()>, +} + +impl RibManager +where + NLRI: TryInto, + >::Error: ToString, + A: std::fmt::Debug + std::fmt::Display, +{ + pub fn new( + chan: mpsc::UnboundedReceiver>, + shutdown: broadcast::Receiver<()>, + ) -> Result { + // TODO: Make this a flag that can be configured. + let (pathset_tx, _) = broadcast::channel(10_000_000); + Ok(RibManager:: { + mgr_rx: chan, + peers: HashMap::new(), + rib: ip_network_table_deps_treebitmap::IpLookupTable::new(), + epoch: 0, + pathset_streaming_handle: pathset_tx, + shutdown, + }) + } + + pub async fn run(&mut self) -> Result<(), String> { + loop { + let next = tokio::select! { + cmd = self.mgr_rx.recv() => cmd, + _ = self.shutdown.recv() => { + warn!("RIB manager shutting down due to shutdown signal."); + return Ok(()); + } + }; + match next { + Some(mgr_cmd) => match mgr_cmd { + RouteManagerCommands::Update(update) => self.handle_update(update)?, + RouteManagerCommands::DumpRib(sender) => { + self.dump_rib(sender); + } + RouteManagerCommands::StreamRib(dump_sender, stream_sender) => { + self.stream_rib(dump_sender, stream_sender); + } + }, + None => { + warn!("All senders of the manager channel have been dropped, manager exiting!"); + return Err("Manager exited due to channel closure".to_string()); + } + } + } + } + + // dump_rib returns an atomic snapshot of the RIB at the current epoch. + fn dump_rib(&mut self, sender: tokio::sync::oneshot::Sender>) { + info!("Starting RIB dump"); + let mut snapshot = RibSnapshot:: { + epoch: self.epoch, + routes: vec![], + }; + for pathset in self.rib.iter() { + snapshot.routes.push(pathset.2.lock().unwrap().clone()); + } + // TODO: handle an error here. + if let Err(e) = sender.send(snapshot) { + warn!("Failed to send snapshot of RIB: {:?}", e); + } + info!("Done RIB dump"); + } + + /// stream_rib sends the current routes in the RIB back via dump_chan then closes it, + /// and subsequently returns a broadcast::Receiver for streaming updates. + fn stream_rib( + &mut self, + dump_sender: mpsc::UnboundedSender<(u64, PathSet)>, + stream_sender: oneshot::Sender)>>, + ) { + // Send all the routes currently in the RIB. + for pathset in self.rib.iter() { + if let Err(e) = dump_sender.send((self.epoch, pathset.2.lock().unwrap().clone())) { + warn!("Failed to send dump to client: {}", e); + } + } + drop(dump_sender); + // Create a new subscriber and return that to the caller to be notified of updates. + let subscriber = self.pathset_streaming_handle.subscribe(); + if let Err(e) = stream_sender.send(subscriber) { + warn!("Failed to send subscriber in stream_rib: {:?}", e); + } + } + + fn handle_update(&mut self, update: RouteUpdate) -> Result<(), String> { + match update { + RouteUpdate::Announce(announce) => self.handle_announce(announce), + RouteUpdate::Withdraw(withdraw) => self.handle_withdraw(withdraw), + } + } + + fn handle_announce(&mut self, update: RouteAnnounce) -> Result<(), String> { + let peer_name = update.peer.clone(); + let nexthop = update.nexthop; + for nlri in update.prefixes { + // Increment the epoch on every NLRI processed. + self.epoch += 1; + let addr: A = nlri.clone().try_into().map_err(|e| e.to_string())?; + let prefixlen = nlri.prefixlen; + if let Some(path_set_wrapped) = self.rib.exact_match(addr, prefixlen.into()) { + let mut path_set = path_set_wrapped.lock().unwrap(); + // There is already this prefix in the RIB, check if this is a + // reannouncement or fresh announcement. + match path_set.paths.get_mut(&update.peer) { + // Peer already announced this route before. + Some(mut existing) => { + trace!( + "Updating existing path attributes for NLRI: {}/{}", + addr, + prefixlen + ); + existing.nexthop = nexthop.clone(); + existing.path_attributes = update.path_attributes.clone(); + } + // First time that this peer is announcing the route. + None => { + let path = Path { + nexthop: nexthop.clone(), + peer_name: peer_name.clone(), + local_pref: update.local_pref, + med: update.med, + as_path: update.as_path.clone(), + path_attributes: update.path_attributes.clone(), + }; + path_set.paths.insert(update.peer.clone(), path); + } + } + + // There is no explicit sorting and marking of the best path since + // BTreeMap is already sorted. + + // Ignore errors sending due to no active receivers on the channel. + let _ = self + .pathset_streaming_handle + .send((self.epoch, path_set.clone())); + } else { + // This prefix has never been seen before, so add a new PathSet for it. + let mut path_set = PathSet:: { + addr, + prefixlen: nlri.prefixlen, + nlri, + paths: BTreeMap::new(), + }; + let path = Path { + nexthop: nexthop.clone(), + peer_name: peer_name.clone(), + local_pref: update.local_pref, + med: update.med, + as_path: update.as_path.clone(), + path_attributes: update.path_attributes.clone(), + }; + path_set.paths.insert(peer_name.clone(), path); + self.rib + .insert(addr, prefixlen.into(), Mutex::new(path_set.clone())); + + // Ignore errors sending due to no active receivers on the channel. + let _ = self.pathset_streaming_handle.send((self.epoch, path_set)); + } + } + + Ok(()) + } + + fn handle_withdraw(&mut self, update: RouteWithdraw) -> Result<(), String> { + for nlri in update.prefixes { + self.epoch += 1; + let addr: A = nlri.clone().try_into().map_err(|e| e.to_string())?; + let mut pathset_empty = false; + if let Some(path_set_wrapped) = self.rib.exact_match(addr, nlri.prefixlen.into()) { + let mut path_set = path_set_wrapped.lock().unwrap(); + let removed = path_set.paths.remove(&update.peer); + if removed.is_none() { + warn!( + "Got a withdrawal for route {} from {}, which was not in RIB", + nlri, update.peer + ); + } + // Ignore errors sending due to no active receivers on the channel. + let _ = self + .pathset_streaming_handle + .send((self.epoch, path_set.clone())); + if path_set.paths.is_empty() { + pathset_empty = true; + } + } else { + warn!( + "Got a withdrawal for route {} from {}, which was not in RIB", + nlri, update.peer + ); + } + if pathset_empty { + self.rib.remove(addr, nlri.prefixlen.into()); + } + } + + Ok(()) + } + + pub fn lookup_path_exact(&self, addr: A, prefixlen: u32) -> Option> { + self.rib + .exact_match(addr, prefixlen) + .map(|path| path.lock().unwrap().clone()) + } +} + +#[cfg(test)] +mod tests { + use crate::bgp_packet::constants::AddressFamilyIdentifier; + use crate::bgp_packet::nlri::NLRI; + use crate::server::rib_manager::RibManager; + use crate::server::rib_manager::RouteAnnounce; + use crate::server::rib_manager::RouteManagerCommands; + use crate::server::rib_manager::RouteUpdate; + + use std::net::Ipv6Addr; + use std::str::FromStr; + use tokio::sync::mpsc; + + #[test] + fn test_manager_process_single() { + let (_, rp_rx) = mpsc::unbounded_channel::>(); + // Nothing spaawned here so no need to send the shutdown signal. + let (_shutdown_tx, shutdown_rx) = tokio::sync::broadcast::channel(1); + let mut rib_manager: RibManager = + RibManager::::new(rp_rx, shutdown_rx).unwrap(); + + let nexthop = Ipv6Addr::new(0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0x1); + + // Send an update to the manager and check that it adds it to the RIB. + let announce = RouteAnnounce { + peer: "Some peer".to_string(), + prefixes: vec![NLRI { + afi: AddressFamilyIdentifier::Ipv6, + prefixlen: 32, + prefix: vec![0x20, 0x01, 0xd, 0xb8], + }], + as_path: vec![65536], + local_pref: 0, + med: 0, + nexthop: nexthop.octets().to_vec(), + path_attributes: vec![], + }; + + // Manually drive the manager instead of calling run to not deal with async in tests. + assert_eq!( + rib_manager.handle_update(RouteUpdate::Announce(announce)), + Ok(()) + ); + + let addr = Ipv6Addr::from_str("2001:db8::").unwrap(); + let prefixlen: u32 = 32; + + let lookup_result = rib_manager.lookup_path_exact(addr, prefixlen).unwrap(); + assert_eq!(lookup_result.paths.len(), 1); + let path_result = lookup_result.paths.get("Some peer").unwrap(); + assert_eq!(path_result.nexthop, nexthop.octets().to_vec()); + } +} diff --git a/bgpd/src/server/route_server.rs b/bgpd/src/server/route_server.rs new file mode 100644 index 0000000..9bc1b5f --- /dev/null +++ b/bgpd/src/server/route_server.rs @@ -0,0 +1,309 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bgp_packet::constants::AddressFamilyIdentifier; +use crate::server::peer::PeerCommands; +use crate::server::rib_manager; +use crate::server::rib_manager::RibSnapshot; +use crate::server::rib_manager::RouteManagerCommands; +use crate::server::route_server::route_server::route_service_server::RouteService; +use crate::server::route_server::route_server::AddressFamily; +use crate::server::route_server::route_server::DumpPathsRequest; +use crate::server::route_server::route_server::DumpPathsResponse; +use crate::server::route_server::route_server::Path; +use crate::server::route_server::route_server::PathSet; +use crate::server::route_server::route_server::Prefix; +use crate::server::route_server::route_server::StreamPathsRequest; +use log::warn; +use std::collections::HashMap; +use std::net::Ipv4Addr; +use std::net::Ipv6Addr; +use tokio::sync::broadcast; +use tokio::sync::mpsc; +use tokio::sync::mpsc::UnboundedSender; +use tokio::sync::oneshot; +use tokio_stream::wrappers::ReceiverStream; +use tonic::Response; +use tonic::Status; + +pub mod route_server { + tonic::include_proto!("bgpd.grpc"); +} + +pub struct RouteServer { + pub ip4_manager: UnboundedSender>, + pub ip6_manager: UnboundedSender>, + + pub peer_state_machines: HashMap>, +} + +impl RouteServer { + async fn get_streaming_receiver( + &self, + manager: UnboundedSender>, + // dump_tx is used to receive the current state before streaming starts. + dump_tx: UnboundedSender<(u64, rib_manager::PathSet)>, + ) -> Result)>, Status> { + let (stream_tx, stream_rx) = + oneshot::channel::)>>(); + if let Err(e) = manager.send(RouteManagerCommands::StreamRib(dump_tx, stream_tx)) { + warn!("Failed to send StreamRib command to route manager: {}", e); + return Err(tonic::Status::internal( + "failed to communicate with route manager".to_owned(), + )); + } + + stream_rx + .await + .map_err(|e| tonic::Status::internal(e.to_string())) + } + + /// Converts a rib_manager::PathSet into the proto format PathSet using the + /// appropriate address family. + fn transform_pathset( + mgr_ps: (u64, rib_manager::PathSet), + address_family: i32, + ) -> PathSet { + let mut proto_pathset = PathSet { + epoch: mgr_ps.0, + prefix: Some(Prefix { + ip_prefix: mgr_ps.1.nlri.prefix, + prefix_len: mgr_ps.1.nlri.prefixlen.into(), + address_family, + }), + paths: vec![], + }; + for (_, path) in mgr_ps.1.paths { + let proto_path = Path { + as_path: path.as_path, + local_pref: path.local_pref, + med: path.med, + nexthop: path.nexthop, + peer_name: path.peer_name, + }; + proto_pathset.paths.push(proto_path); + } + proto_pathset + } +} + +#[tonic::async_trait] +impl RouteService for RouteServer { + async fn dump_paths( + &self, + request: tonic::Request, + ) -> Result, Status> { + let mut response = DumpPathsResponse { + epoch: 0, + path_sets: vec![], + }; + + let afi = AddressFamilyIdentifier::try_from(request.get_ref().address_family as u16) + .map_err(|e| tonic::Status::internal(e.to_string()))?; + match afi { + AddressFamilyIdentifier::Ipv4 => { + let (tx, rx) = tokio::sync::oneshot::channel::>(); + if let Err(e) = self.ip4_manager.send(RouteManagerCommands::DumpRib(tx)) { + warn!("Failed to send DumpRib command to route manager: {}", e); + return Err(tonic::Status::internal( + "failed to communicate with route manager", + )); + } + match rx.await { + Ok(result) => { + response.epoch = result.epoch; + for pathset in result.routes { + let mut proto_pathset = PathSet { + epoch: result.epoch, + prefix: Some(Prefix { + ip_prefix: pathset.nlri.prefix, + prefix_len: pathset.nlri.prefixlen.into(), + address_family: AddressFamily::IPv4.into(), + }), + paths: vec![], + }; + for (_, path) in pathset.paths { + let proto_path = Path { + as_path: path.as_path, + local_pref: path.local_pref, + med: path.med, + nexthop: path.nexthop, + peer_name: path.peer_name, + }; + proto_pathset.paths.push(proto_path); + } + response.path_sets.push(proto_pathset); + } + + Ok(tonic::Response::new(response)) + } + Err(e) => { + warn!("Failed to get response from route manager: {}", e); + return Err(tonic::Status::internal( + "failed to get response from route manager", + )); + } + } + } + AddressFamilyIdentifier::Ipv6 => { + let (tx, rx) = tokio::sync::oneshot::channel::>(); + if let Err(e) = self.ip6_manager.send(RouteManagerCommands::DumpRib(tx)) { + warn!("Failed to send DumpRib command to route manager: {}", e); + return Err(tonic::Status::internal( + "failed to communicate with route manager", + )); + } + match rx.await { + Ok(result) => { + response.epoch = result.epoch; + for pathset in result.routes { + let mut proto_pathset = PathSet { + epoch: result.epoch, + prefix: Some(Prefix { + ip_prefix: pathset.nlri.prefix, + prefix_len: pathset.nlri.prefixlen.into(), + address_family: AddressFamily::IPv6.into(), + }), + paths: vec![], + }; + for (_, path) in pathset.paths { + let proto_path = Path { + as_path: path.as_path, + local_pref: path.local_pref, + med: path.med, + nexthop: path.nexthop, + peer_name: path.peer_name, + }; + proto_pathset.paths.push(proto_path); + } + response.path_sets.push(proto_pathset); + } + + Ok(tonic::Response::new(response)) + } + Err(e) => { + warn!("Failed to get response from route manager: {}", e); + return Err(tonic::Status::internal( + "failed to get response from route manager", + )); + } + } + } + } + } + + type StreamPathsStream = ReceiverStream>; + + async fn stream_paths( + &self, + request: tonic::Request, + ) -> Result, Status> { + match request.get_ref().address_family { + 1 => { + let (dump_tx, mut dump_rx) = mpsc::unbounded_channel(); + let mut receiver = self + .get_streaming_receiver::(self.ip4_manager.clone(), dump_tx) + .await?; + + let (tx, rx) = mpsc::channel(10_000); + // Spawn a task for receving values from the manager and send them to the peer. + tokio::spawn(async move { + // Consume the dump before moving to the streamed paths. + while let Some(next) = dump_rx.recv().await { + let pathset = + RouteServer::transform_pathset(next, AddressFamily::IPv4.into()); + if let Err(e) = tx.send(Ok(pathset)).await { + warn!("Failed to send path to peer: {}", e); + return; + } + } + + loop { + let next = receiver.recv().await; + if let Err(e) = next { + warn!("Failed to get next streaming route from manager: {}", e); + let _ = tx + .send(Err(tonic::Status::internal(format!( + "Failed to get next route from manager: {}", + e + )))) + .await; + return; + } + let route = next.unwrap(); + if let Err(e) = tx + .send(Ok(RouteServer::transform_pathset( + route, + AddressFamily::IPv4.into(), + ))) + .await + { + warn!("Failed to send streaming route to peer: {}", e); + return; + } + } + }); + + return Ok(Response::new(ReceiverStream::new(rx))); + } + 2 => { + let (dump_tx, mut dump_rx) = mpsc::unbounded_channel(); + let mut receiver = self + .get_streaming_receiver::(self.ip6_manager.clone(), dump_tx) + .await?; + + let (tx, rx) = mpsc::channel(10_000); + // Spawn a task for receving values from the manager and send them to the peer. + tokio::spawn(async move { + // Consume the dump before moving to the streamed paths. + while let Some(next) = dump_rx.recv().await { + let pathset = + RouteServer::transform_pathset(next, AddressFamily::IPv4.into()); + if let Err(e) = tx.send(Ok(pathset)).await { + warn!("Failed to send path to peer: {}", e); + return; + } + } + loop { + let next = receiver.recv().await; + if let Err(e) = next { + warn!("Failed to get next streaming route from manager: {}", e); + let _ = tx + .send(Err(tonic::Status::internal(format!( + "Failed to get next route from manager: {}", + e + )))) + .await; + return; + } + let route = next.unwrap(); + if let Err(e) = tx + .send(Ok(RouteServer::transform_pathset( + route, + AddressFamily::IPv6.into(), + ))) + .await + { + warn!("Failed to send streaming route to peer: {}", e); + return; + } + } + }); + + return Ok(Response::new(ReceiverStream::new(rx))); + } + _ => return Err(tonic::Status::internal("Unknown address family")), + }; + } +} diff --git a/bgpd/src/streamer_cli/main.rs b/bgpd/src/streamer_cli/main.rs new file mode 100644 index 0000000..e8a3475 --- /dev/null +++ b/bgpd/src/streamer_cli/main.rs @@ -0,0 +1,164 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bgpd::bgp_packet::constants::AddressFamilyIdentifier; +use bgpd::bgp_packet::nlri::NLRI; +use bgpd::server::route_server::route_server::route_service_client::RouteServiceClient; +use bgpd::server::route_server::route_server::DumpPathsRequest; +use bgpd::server::route_server::route_server::PathSet; +use bgpd::server::route_server::route_server::StreamPathsRequest; +use clap::Parser; +use std::process::exit; +use std::time::Duration; +use tokio::task::JoinHandle; +use tonic::transport::Endpoint; +use tracing::{info, warn}; + +extern crate clap; + +#[derive(clap::Parser)] +#[clap( + author = "Rayhaan Jaufeerally ", + version = "0.1", + about = "A program to install routes from BGP into the Linux control plane" +)] +struct Cli { + server_address: String, +} + +#[tokio::main] +async fn main() -> Result<(), String> { + let subscriber = tracing_subscriber::fmt(); + + match subscriber.try_init() { + Ok(()) => {} + Err(e) => { + eprintln!("Failed to initialize logger: {:?}", e); + exit(1); + } + } + + let cli = Cli::parse(); + + info!("Starting client"); + let grpc_endpoint = cli.server_address; + let endpoint = Endpoint::from_shared(grpc_endpoint) + .map_err(|e| e.to_string())? + .keep_alive_timeout(Duration::from_secs(10)); + let mut client = RouteServiceClient::connect(endpoint) + .await + .map_err(|e| e.to_string())?; + + info!("Connected"); + + // 1. First subscribe to the route feed and put these into an unbounded channel. + let (stream_tx, mut stream_rx) = tokio::sync::mpsc::unbounded_channel::(); + let request = StreamPathsRequest { + address_family: 2_i32, + }; + + let mut client_copy = client.clone(); + let _recv_handle: JoinHandle> = tokio::spawn(async move { + let mut rpc_stream = client_copy + .stream_paths(request) + .await + .map_err(|e| e.to_string())? + .into_inner(); + while let Some(route) = rpc_stream.message().await.map_err(|e| e.to_string())? { + stream_tx.send(route).map_err(|e| e.to_string())?; + } + Err("Stream closed".to_string()) + }); + + // 2. Dump the whole RIB + let dump_request = DumpPathsRequest { + address_family: 2_i32, + }; + let dump_response = client.dump_paths(dump_request).await.unwrap().into_inner(); + let dump_epoch = dump_response.epoch; + + info!("Dump epoch was: {}", dump_epoch); + + let overrun_slot: Option; + loop { + let item = stream_rx.recv().await; + match &item { + Some(pathset) => { + if pathset.epoch >= dump_epoch { + overrun_slot = Some(pathset.clone()); + break; + } else { + info!("Skipping already-dumped epoch: {}", pathset.epoch); + } + } + None => { + return Err("Stream unexpectedly closed".to_owned()); + } + } + } + + // Replay all the pathsets from dump_response + for pathset in dump_response.path_sets { + info!("Got pathset: {:?}", pathset); + // Parse an NLRI from the pathset + if let Some(prefix) = &pathset.prefix { + let nlri = NLRI::from_bytes( + AddressFamilyIdentifier::Ipv6, + prefix.ip_prefix.clone(), + prefix.prefix_len as u8, + ) + .unwrap(); + info!("Parsed NLRI: {}", nlri.to_string()); + } + } + + // Replay the overrun slot + if let Some(pathset) = overrun_slot { + if let Some(prefix) = &pathset.prefix { + let nlri = NLRI::from_bytes( + AddressFamilyIdentifier::Ipv6, + prefix.ip_prefix.clone(), + prefix.prefix_len as u8, + ) + .unwrap(); + info!("Parsed NLRI: {}", nlri.to_string()); + } + } + + loop { + let item = stream_rx.recv().await; + + match &item { + Some(pathset) => { + info!("Got pathset: {:?}", pathset); + // Parse an NLRI from the pathset + if let Some(prefix) = &pathset.prefix { + let nlri = NLRI::from_bytes( + AddressFamilyIdentifier::Ipv6, + prefix.ip_prefix.clone(), + prefix.prefix_len as u8, + ) + .unwrap(); + info!("Parsed NLRI: {}", nlri.to_string()); + } + } + None => { + warn!("stream_rx closed"); + break; + } + } + } + + Err("Program exited unexpectedly.".to_owned()) +} diff --git a/bgpd_overview.drawio b/bgpd_overview.drawio new file mode 100644 index 0000000..48b5255 --- /dev/null +++ b/bgpd_overview.drawio @@ -0,0 +1 @@ +UzV2zq1wL0osyPDNT0nNUTV2VTV2LsrPL4GwciucU3NyVI0MMlNUjV1UjYwMgFjVyA2HrCFY1qAgsSg1rwSLBiADYTaQg2Y1AA== \ No newline at end of file diff --git a/configs/pr01.home.json b/configs/pr01.home.json new file mode 100644 index 0000000..fdde953 --- /dev/null +++ b/configs/pr01.home.json @@ -0,0 +1,85 @@ +{ + "identifier": "193.36.105.1", + "asn": 210036, + "hold_time": 180, + "listen_addrs": [ + "[::]:179" + ], + "peers": [ + { + "name": "iway_rs1_ipv6", + "ip": "2001:8e0:9ff:2000::1", + "asn": 8758, + "afi": 2, + "safi": 1, + "local_pref": 100, + "announcements": [ + { + "prefix": "2a0d:d740:105::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + } + ] + }, + { + "name": "iway_rs2_ipv6", + "ip": "2001:8e0:ffff:3::42", + "asn": 8758, + "afi": 2, + "safi": 1, + "local_pref": 200, + "announcements": [ + { + "prefix": "2a0d:d740::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + }, + { + "prefix": "2a0d:d740:105::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + } + ] + }, + { + "name": "iway_rs3_ipv6", + "ip": "2001:8e0:ffff:3::72", + "asn": 8758, + "afi": 2, + "safi": 1, + "local_pref": 200, + "announcements": [ + { + "prefix": "2a0d:d740::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + }, + { + "prefix": "2a0d:d740:105::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + } + ] + }, + { + "name": "iway_rs1_ipv4", + "ip": "::ffff:83.150.40.2", + "asn": 8758, + "afi": 1, + "safi": 1, + "local_pref": 100, + "announcements": [ + { + "prefix": "193.36.104.0/24", + "nexthop": "83.150.43.65", + "large_communities": ["210036:10:1"] + }, + { + "prefix": "193.36.105.0/24", + "nexthop": "83.150.43.65", + "large_communities": ["210036:10:1"] + } + ] + } + ] +} diff --git a/configs/pr01.man.json b/configs/pr01.man.json new file mode 100644 index 0000000..1d088df --- /dev/null +++ b/configs/pr01.man.json @@ -0,0 +1,90 @@ +{ + "identifier": "193.36.105.1", + "asn": 210036, + "hold_time": 180, + "listen_addrs": [ + "[::]:179" + ], + "peers": [ + { + "name": "iway_rs1_ipv6", + "ip": "2001:8e0:9ff:2000::1", + "asn": 8758, + "afi": 2, + "safi": 1, + "local_pref": 100, + "announcements": [ + { + "prefix": "2a0d:d740::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + }, + { + "prefix": "2a0d:d740:105::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + } + ] + }, + { + "name": "iway_rs2_ipv6", + "ip": "2001:8e0:ffff:3::42", + "asn": 8758, + "afi": 2, + "safi": 1, + "local_pref": 200, + "announcements": [ + { + "prefix": "2a0d:d740::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + }, + { + "prefix": "2a0d:d740:105::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + } + ] + }, + { + "name": "iway_rs3_ipv6", + "ip": "2001:8e0:ffff:3::72", + "asn": 8758, + "afi": 2, + "safi": 1, + "local_pref": 200, + "announcements": [ + { + "prefix": "2a0d:d740::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + }, + { + "prefix": "2a0d:d740:105::/48", + "nexthop": "2001:8e0:9ff:2000::5396:2b41", + "large_communities": ["210036:10:1"] + } + ] + }, + { + "name": "iway_rs1_ipv4", + "ip": "::ffff:83.150.40.2", + "asn": 8758, + "afi": 1, + "safi": 1, + "local_pref": 100, + "announcements": [ + { + "prefix": "193.36.104.0/24", + "nexthop": "83.150.43.65", + "large_communities": ["210036:10:1"] + }, + { + "prefix": "193.36.105.0/24", + "nexthop": "83.150.43.65", + "large_communities": ["210036:10:1"] + } + ] + } + ] +} diff --git a/configs/test.json b/configs/test.json new file mode 100644 index 0000000..2441f73 --- /dev/null +++ b/configs/test.json @@ -0,0 +1,25 @@ +{ + "identifier": "193.36.105.196", + "asn": 210036, + "hold_time": 180, + "http_addr": "[::]:9179", + "listen_addrs": [ + "[::]:1790" + ], + "peers": [ + { + "name": "sample_peer", + "ip": "2a0d:d740:105:0:af9d:7b05:a881:2e91", + "asn": 210036, + "afi": "Ipv4", + "safi": "Unicast", + "local_pref": 100, + "announcements": [ + { + "prefix": "2001:db8::/32", + "nexthop": "2a0d:d740:105:0:af9d:7b05:a881:2e91" + } + ] + } + ] +} diff --git a/netlink/.gitignore b/netlink/.gitignore new file mode 100644 index 0000000..ff47c2d --- /dev/null +++ b/netlink/.gitignore @@ -0,0 +1,11 @@ +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk diff --git a/netlink/Cargo.toml b/netlink/Cargo.toml new file mode 100644 index 0000000..fb4d140 --- /dev/null +++ b/netlink/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "netlink" +version = "0.1.0" +authors = ["rayhaan"] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +libc = "0.2" +bytes = "1" +byteorder = "1.4.3" +log = "0.4" +hex = "0.4.3" +neli = "0.6.2" \ No newline at end of file diff --git a/netlink/README.md b/netlink/README.md new file mode 100644 index 0000000..fc10db2 --- /dev/null +++ b/netlink/README.md @@ -0,0 +1,33 @@ +# Netlink + +This project was created to have an easy way to manipulate routes in the Linux kernel using the Netlink protocol. + +There are some other libraries which provide similar functionality, but which were not offering the exact API which was desired to quickly modify routing state from control plane routing protocol daemons. + +The API that this crate provides is (currently) specifically only for mutating routes using the following function: + +```rust +// Create a handle which opens up a socket to the kernel. +let nl_iface = NetlinkInterface::new().unwrap(); + +// Modify a route +let af: u8 = 2; // Address family 1 is IPv6. +let dst_prefix = vec![0x20, 0x01, 0xdb, 0x8]; // 2001:db8::. +let dst_prefix_len = 32; // Specifying the prefix length is 32 bits. +let gateway_addr = vec![ // Nexthop / gateway to send packets to. + 0x2a, 0x0d, 0xd7, 0x40, 0x01, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, +]; +let rt_table = 200; // Install this route into table 200. + +self.nl_iface.mutate_route( + true, // Add a route, false would be for removing a route. + af, + dst_prefix, + dst_prefix_len, + gateway_addr, + Some(rt_table)).unwrap(); + +``` + + Internally `RouteMessage` is used to represent a [rtmsg](https://man7.org/linux/man-pages/man7/rtnetlink.7.html) to the kernel, with a set of `RouteAttributes` that's attached to a particular `rtmsg`. \ No newline at end of file diff --git a/netlink/netlink.pcap b/netlink/netlink.pcap new file mode 100644 index 0000000..4ceec6c Binary files /dev/null and b/netlink/netlink.pcap differ diff --git a/netlink/src/constants.rs b/netlink/src/constants.rs new file mode 100644 index 0000000..4813140 --- /dev/null +++ b/netlink/src/constants.rs @@ -0,0 +1,172 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is direcly from https://docs.rs/libc/0.2.98/src/libc/unix/linux_like/linux/mod.rs.html#2449 +// because when we build with musl libc some of these values are missing. + +// linux/rtnetlink.h +pub const TCA_UNSPEC: libc::c_ushort = 0; +pub const TCA_KIND: libc::c_ushort = 1; +pub const TCA_OPTIONS: libc::c_ushort = 2; +pub const TCA_STATS: libc::c_ushort = 3; +pub const TCA_XSTATS: libc::c_ushort = 4; +pub const TCA_RATE: libc::c_ushort = 5; +pub const TCA_FCNT: libc::c_ushort = 6; +pub const TCA_STATS2: libc::c_ushort = 7; +pub const TCA_STAB: libc::c_ushort = 8; + +pub const RTM_NEWLINK: u16 = 16; +pub const RTM_DELLINK: u16 = 17; +pub const RTM_GETLINK: u16 = 18; +pub const RTM_SETLINK: u16 = 19; +pub const RTM_NEWADDR: u16 = 20; +pub const RTM_DELADDR: u16 = 21; +pub const RTM_GETADDR: u16 = 22; +pub const RTM_NEWROUTE: u16 = 24; +pub const RTM_DELROUTE: u16 = 25; +pub const RTM_GETROUTE: u16 = 26; +pub const RTM_NEWNEIGH: u16 = 28; +pub const RTM_DELNEIGH: u16 = 29; +pub const RTM_GETNEIGH: u16 = 30; +pub const RTM_NEWRULE: u16 = 32; +pub const RTM_DELRULE: u16 = 33; +pub const RTM_GETRULE: u16 = 34; +pub const RTM_NEWQDISC: u16 = 36; +pub const RTM_DELQDISC: u16 = 37; +pub const RTM_GETQDISC: u16 = 38; +pub const RTM_NEWTCLASS: u16 = 40; +pub const RTM_DELTCLASS: u16 = 41; +pub const RTM_GETTCLASS: u16 = 42; +pub const RTM_NEWTFILTER: u16 = 44; +pub const RTM_DELTFILTER: u16 = 45; +pub const RTM_GETTFILTER: u16 = 46; +pub const RTM_NEWACTION: u16 = 48; +pub const RTM_DELACTION: u16 = 49; +pub const RTM_GETACTION: u16 = 50; +pub const RTM_NEWPREFIX: u16 = 52; +pub const RTM_GETMULTICAST: u16 = 58; +pub const RTM_GETANYCAST: u16 = 62; +pub const RTM_NEWNEIGHTBL: u16 = 64; +pub const RTM_GETNEIGHTBL: u16 = 66; +pub const RTM_SETNEIGHTBL: u16 = 67; +pub const RTM_NEWNDUSEROPT: u16 = 68; +pub const RTM_NEWADDRLABEL: u16 = 72; +pub const RTM_DELADDRLABEL: u16 = 73; +pub const RTM_GETADDRLABEL: u16 = 74; +pub const RTM_GETDCB: u16 = 78; +pub const RTM_SETDCB: u16 = 79; +pub const RTM_NEWNETCONF: u16 = 80; +pub const RTM_GETNETCONF: u16 = 82; +pub const RTM_NEWMDB: u16 = 84; +pub const RTM_DELMDB: u16 = 85; +pub const RTM_GETMDB: u16 = 86; +pub const RTM_NEWNSID: u16 = 88; +pub const RTM_DELNSID: u16 = 89; +pub const RTM_GETNSID: u16 = 90; + +pub const RTM_F_NOTIFY: libc::c_uint = 0x100; +pub const RTM_F_CLONED: libc::c_uint = 0x200; +pub const RTM_F_EQUALIZE: libc::c_uint = 0x400; +pub const RTM_F_PREFIX: libc::c_uint = 0x800; + +pub const RTA_UNSPEC: libc::c_ushort = 0; +pub const RTA_DST: libc::c_ushort = 1; +pub const RTA_SRC: libc::c_ushort = 2; +pub const RTA_IIF: libc::c_ushort = 3; +pub const RTA_OIF: libc::c_ushort = 4; +pub const RTA_GATEWAY: libc::c_ushort = 5; +pub const RTA_PRIORITY: libc::c_ushort = 6; +pub const RTA_PREFSRC: libc::c_ushort = 7; +pub const RTA_METRICS: libc::c_ushort = 8; +pub const RTA_MULTIPATH: libc::c_ushort = 9; +pub const RTA_PROTOINFO: libc::c_ushort = 10; // No longer used +pub const RTA_FLOW: libc::c_ushort = 11; +pub const RTA_CACHEINFO: libc::c_ushort = 12; +pub const RTA_SESSION: libc::c_ushort = 13; // No longer used +pub const RTA_MP_ALGO: libc::c_ushort = 14; // No longer used +pub const RTA_TABLE: libc::c_ushort = 15; +pub const RTA_MARK: libc::c_ushort = 16; +pub const RTA_MFC_STATS: libc::c_ushort = 17; + +pub const RTN_UNSPEC: libc::c_uchar = 0; +pub const RTN_UNICAST: libc::c_uchar = 1; +pub const RTN_LOCAL: libc::c_uchar = 2; +pub const RTN_BROADCAST: libc::c_uchar = 3; +pub const RTN_ANYCAST: libc::c_uchar = 4; +pub const RTN_MULTICAST: libc::c_uchar = 5; +pub const RTN_BLACKHOLE: libc::c_uchar = 6; +pub const RTN_UNREACHABLE: libc::c_uchar = 7; +pub const RTN_PROHIBIT: libc::c_uchar = 8; +pub const RTN_THROW: libc::c_uchar = 9; +pub const RTN_NAT: libc::c_uchar = 10; +pub const RTN_XRESOLVE: libc::c_uchar = 11; + +pub const RTPROT_UNSPEC: libc::c_uchar = 0; +pub const RTPROT_REDIRECT: libc::c_uchar = 1; +pub const RTPROT_KERNEL: libc::c_uchar = 2; +pub const RTPROT_BOOT: libc::c_uchar = 3; +pub const RTPROT_STATIC: libc::c_uchar = 4; + +pub const RT_SCOPE_UNIVERSE: libc::c_uchar = 0; +pub const RT_SCOPE_SITE: libc::c_uchar = 200; +pub const RT_SCOPE_LINK: libc::c_uchar = 253; +pub const RT_SCOPE_HOST: libc::c_uchar = 254; +pub const RT_SCOPE_NOWHERE: libc::c_uchar = 255; + +pub const RT_TABLE_UNSPEC: libc::c_uchar = 0; +pub const RT_TABLE_COMPAT: libc::c_uchar = 252; +pub const RT_TABLE_DEFAULT: libc::c_uchar = 253; +pub const RT_TABLE_MAIN: libc::c_uchar = 254; +pub const RT_TABLE_LOCAL: libc::c_uchar = 255; + +pub const RTMSG_OVERRUN: u32 = libc::NLMSG_OVERRUN as u32; +pub const RTMSG_NEWDEVICE: u32 = 0x11; +pub const RTMSG_DELDEVICE: u32 = 0x12; +pub const RTMSG_NEWROUTE: u32 = 0x21; +pub const RTMSG_DELROUTE: u32 = 0x22; +pub const RTMSG_NEWRULE: u32 = 0x31; +pub const RTMSG_DELRULE: u32 = 0x32; +pub const RTMSG_CONTROL: u32 = 0x40; +pub const RTMSG_AR_FAILED: u32 = 0x51; + +pub const MAX_ADDR_LEN: usize = 7; +pub const ARPD_UPDATE: libc::c_ushort = 0x01; +pub const ARPD_LOOKUP: libc::c_ushort = 0x02; +pub const ARPD_FLUSH: libc::c_ushort = 0x03; +pub const ATF_MAGIC: libc::c_int = 0x80; + +// From https://docs.rs/libc/0.2.98/src/libc/unix/linux_like/linux/gnu/mod.rs.html#938 +// linux/rtnetlink.h +pub const TCA_PAD: libc::c_ushort = 9; +pub const TCA_DUMP_INVISIBLE: libc::c_ushort = 10; +pub const TCA_CHAIN: libc::c_ushort = 11; +pub const TCA_HW_OFFLOAD: libc::c_ushort = 12; + +pub const RTM_DELNETCONF: u16 = 81; +pub const RTM_NEWSTATS: u16 = 92; +pub const RTM_GETSTATS: u16 = 94; +pub const RTM_NEWCACHEREPORT: u16 = 96; + +pub const RTM_F_LOOKUP_TABLE: libc::c_uint = 0x1000; +pub const RTM_F_FIB_MATCH: libc::c_uint = 0x2000; + +pub const RTA_VIA: libc::c_ushort = 18; +pub const RTA_NEWDST: libc::c_ushort = 19; +pub const RTA_PREF: libc::c_ushort = 20; +pub const RTA_ENCAP_TYPE: libc::c_ushort = 21; +pub const RTA_ENCAP: libc::c_ushort = 22; +pub const RTA_EXPIRES: libc::c_ushort = 23; +pub const RTA_PAD: libc::c_ushort = 24; +pub const RTA_UID: libc::c_ushort = 25; +pub const RTA_TTL_PROPAGATE: libc::c_ushort = 26; diff --git a/netlink/src/lib.rs b/netlink/src/lib.rs new file mode 100644 index 0000000..090f4ec --- /dev/null +++ b/netlink/src/lib.rs @@ -0,0 +1,18 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod constants; +pub mod netlink_interface; +pub mod packet; +pub mod traits; diff --git a/netlink/src/main.rs b/netlink/src/main.rs new file mode 100644 index 0000000..8492c66 --- /dev/null +++ b/netlink/src/main.rs @@ -0,0 +1,109 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! This is just a small test program for testing the netlink integration. + +use bytes::BytesMut; +use libc::c_void; +use netlink::packet::parse_netlink_message; +use netlink::packet::RouteAttribute; +use netlink::traits::NetlinkAttribute; +use netlink::traits::Serializable; + +use netlink::packet::NetlinkHeader; +use netlink::packet::RouteMessage; +use std::convert::TryInto; + +fn main() { + println!("Starting netlink dump!"); + + let nl_fd: libc::c_int; + unsafe { + // Establish a Netlink socket to the kernel. + nl_fd = libc::socket(libc::AF_NETLINK, libc::SOCK_RAW, libc::NETLINK_ROUTE); + if nl_fd < 0 { + println!("Failed to create netlink socket: {}", nl_fd); + std::process::exit(1); + } + let sockaddr = libc::sockaddr { + sa_family: libc::AF_NETLINK as u16, + sa_data: [0i8; 14], + }; + let bind_result = libc::bind( + nl_fd, + &sockaddr, + std::mem::size_of::().try_into().unwrap(), + ); + if bind_result < 0 { + println!("Failed to create netlink socket: {}", nl_fd); + std::process::exit(1); + } + } + + // Build a route dump message and send it to the kernel. + let mut nl_hdr = NetlinkHeader { + nlmsg_type: libc::RTM_NEWROUTE, + nlmsg_flags: (libc::NLM_F_REQUEST) as u16, + nlmsg_seq: 0xcafe, + nlmsg_pid: 0, + nlmsg_len: 0, + }; + + println!("message type: {}", nl_hdr.nlmsg_type); + let rt_msg = RouteMessage { + af: libc::AF_INET6 as u8, + dst_len: 32, + ..Default::default() + }; + + let dst_attr = RouteAttribute::Dst(vec![0x20, 0x01, 0xdb, 0x8]); + let gateway_addr = RouteAttribute::Gateway(vec![ + 0x2a, 0x0d, 0xd7, 0x40, 0x1, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, + ]); + + let len = std::mem::size_of::() + + std::mem::size_of::() + + 4_usize + + dst_attr.payload_len() as usize + + 4_usize + + gateway_addr.payload_len() as usize; + nl_hdr.nlmsg_len = len as u32; + println!("Length of netlink message: {}", len); + + let mut buf = BytesMut::with_capacity(4096); + nl_hdr.to_wire(&mut buf).unwrap(); + rt_msg.to_wire(&mut buf).unwrap(); + dst_attr.to_wire(&mut buf).unwrap(); + gateway_addr.to_wire(&mut buf).unwrap(); + + unsafe { + let bytes_written = libc::write(nl_fd, buf.as_ptr() as *const c_void, buf.len()); + println!("bytes_written: {}", bytes_written); + } + + let mut resp = BytesMut::with_capacity(4096); + + unsafe { + let bytes_read = libc::read(nl_fd, resp.as_mut_ptr() as *mut c_void, 4096); + resp.set_len(bytes_read.try_into().unwrap()); + }; + + println!("Read bytes from netlink: {:?}", resp); + + while resp.len() > 3 { + let (header, response) = parse_netlink_message(&mut resp).unwrap(); + println!("Header: {:?} response: {:?}", header, response); + } +} diff --git a/netlink/src/netlink_interface.rs b/netlink/src/netlink_interface.rs new file mode 100644 index 0000000..1f62aa7 --- /dev/null +++ b/netlink/src/netlink_interface.rs @@ -0,0 +1,211 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::packet::parse_netlink_message; +use crate::packet::NetlinkHeader; +use crate::packet::NetlinkPayload; +use crate::packet::RouteAttribute; +use crate::packet::RouteMessage; +use crate::traits::NetlinkAttribute; +use crate::traits::Serializable; +use bytes::BytesMut; +use libc::c_void; +use log::info; +use std::convert::TryInto; +use std::fmt; +use std::fmt::Formatter; +use std::net::Ipv6Addr; + +pub struct NetlinkInterface { + nl_fd: libc::c_int, + seqno: u32, + buf: BytesMut, +} + +#[derive(Debug, Clone)] +pub struct NetlinkError { + reason: String, +} + +impl NetlinkError { + fn new(reason: String) -> NetlinkError { + NetlinkError { reason } + } +} + +impl fmt::Display for NetlinkError { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + write!(f, "{}", self.reason) + } +} + +impl std::error::Error for NetlinkError {} + +impl NetlinkInterface { + /// # Safety + /// This function is unsafe as it manually creates a netlink socket with the socket + /// system call. + pub unsafe fn new() -> Result> { + let nl_fd = libc::socket(libc::AF_NETLINK, libc::SOCK_RAW, libc::NETLINK_ROUTE); + if nl_fd < 0 { + return Err(Box::new(NetlinkError::new(format!( + "Error creating netlink socket: {}", + nl_fd + )))); + } + let sockaddr = libc::sockaddr { + sa_family: libc::AF_NETLINK as u16, + sa_data: [0i8; 14], + }; + let bind_result = libc::bind( + nl_fd, + &sockaddr, + std::mem::size_of::().try_into()?, + ); + if bind_result < 0 { + return Err(Box::new(NetlinkError::new(format!( + "Failed to bind to netlink socket: {}", + bind_result + )))); + } + Ok(NetlinkInterface { + nl_fd, + seqno: 0, + buf: BytesMut::with_capacity(4096), + }) + } + + pub fn mutate_route( + &mut self, + add: bool, + address_family: u8, + dst_prefix: Vec, + prefix_len: u8, + gateway: Vec, + table: Option, + ) -> Result<(), Box> { + info!( + "Mutate route: {:x?}/{prefix_len} via {:x?}", + dst_prefix, gateway + ); + // XXX: Fix this we should reuse the buffer instead of allocating a new one + // each time. But there's some bug with how the size is being manipulated + // below that causes the buffer to get exhausted. + self.buf = BytesMut::with_capacity(4096); + + let msg_type = match add { + true => libc::RTM_NEWROUTE, + false => libc::RTM_DELROUTE, + }; + self.seqno += 1; + let mut nl_hdr = NetlinkHeader { + nlmsg_type: msg_type, + nlmsg_flags: (libc::NLM_F_REQUEST | libc::NLM_F_ACK) as u16, + nlmsg_seq: self.seqno, + nlmsg_pid: 0, + nlmsg_len: 0, // Filled in later. + }; + + let rt_msg = RouteMessage { + af: address_family, + dst_len: prefix_len, + ..Default::default() + }; + + let dst_attr = RouteAttribute::Dst(dst_prefix); + let gateway_addr = RouteAttribute::Gateway(gateway); + + nl_hdr.nlmsg_len = std::mem::size_of::() as u32 + + std::mem::size_of::() as u32 + + 4 // Attribute header + + dst_attr.payload_len() as u32 + + 4 // Attribute header + + gateway_addr.payload_len() as u32; + + let mut table_attr: Option = None; + if let Some(table_id) = table { + table_attr = Some(RouteAttribute::Table(table_id)); + nl_hdr.nlmsg_len += 4 + table_attr.as_ref().unwrap().payload_len() as u32; + } + + // self.buf.clear(); + nl_hdr.to_wire(&mut self.buf)?; + rt_msg.to_wire(&mut self.buf)?; + dst_attr.to_wire(&mut self.buf)?; + gateway_addr.to_wire(&mut self.buf)?; + if let Some(table_attr) = table_attr { + table_attr.to_wire(&mut self.buf)?; + } + + unsafe { + let bytes_written = libc::write( + self.nl_fd, + self.buf.as_ptr() as *const c_void, + self.buf.len(), + ); + if bytes_written < 0 { + return Err(Box::new(NetlinkError::new(format!( + "Failed to write to netlink: {}", + bytes_written + )))); + } + if bytes_written != self.buf.len() as isize { + return Err(Box::new(NetlinkError::new( + "Failed to write full message to netlink".to_string(), + ))); + } + } + + // Read the response back from netlink, should be a ACK or Error. + self.buf.clear(); + + unsafe { + let bytes_read = libc::read(self.nl_fd, self.buf.as_mut_ptr() as *mut c_void, 4906); + if bytes_read < 0 { + return Err(Box::new(NetlinkError::new(format!( + "Failed to read from netlink: {}", + bytes_read + )))); + } + println!( + "bytes_read: {} (usz) {}, cap: {}", + bytes_read, + (bytes_read as usize), + self.buf.capacity() + ); + + // let read_view = self.buf.clone(); + self.buf.set_len(bytes_read as usize); + + let (_header, response) = parse_netlink_message(&mut self.buf)?; + match response { + NetlinkPayload::Error(e) => { + if e.error == 0 { + // Successful ACK of the route add. + Ok(()) + } else { + Err(Box::new(NetlinkError::new(format!( + "Got netlink error: {:?}", + e + )))) + } + } + _ => Err(Box::new(NetlinkError::new(format!( + "Got unexpected netlink message: {:?}", + response + )))), + } + } + } +} diff --git a/netlink/src/packet.rs b/netlink/src/packet.rs new file mode 100644 index 0000000..b352605 --- /dev/null +++ b/netlink/src/packet.rs @@ -0,0 +1,614 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::constants; +use crate::traits::NetlinkAttribute; +use crate::traits::Serializable; +use byteorder::ByteOrder; +use byteorder::NativeEndian; +use byteorder::ReadBytesExt; +use byteorder::WriteBytesExt; +use bytes::Buf; +use bytes::BufMut; +use bytes::BytesMut; +use log::info; +use std::convert::TryInto; +use std::fmt::Display; +use std::fmt::Formatter; +use std::io::Read; +use std::io::Write; + +// XXX: Hack to make libc:: constants the right type. +const CONST_NETLINK_ROUTE: u16 = libc::NETLINK_ROUTE as u16; +const CONST_NETLINK_NOOP: u16 = libc::NLMSG_NOOP as u16; +const CONST_NETLINK_ERR: u16 = libc::NLMSG_ERROR as u16; + +macro_rules! check_vec_len { + ($payload:expr, $len:expr) => { + if $payload.len() != $len { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!( + "expected {} bytes of payload, instead got {}", + $len, + $payload.len() + ), + )); + } + }; +} + +#[derive(Debug)] +pub enum NetlinkPayload { + Route(RouteMessage, Vec), + Error(NetlinkError), + Noop(), + Done(), +} + +pub fn parse_netlink_message( + buf: &mut BytesMut, +) -> Result<(NetlinkHeader, NetlinkPayload), std::io::Error> { + let header = NetlinkHeader::from_wire(buf)?; + let payload_len = header.nlmsg_len - std::mem::size_of::() as u32; + if payload_len > buf.len().try_into().unwrap() { + return Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + format!( + "Requested payload_len > buffer len: {} > {}", + payload_len, + buf.len() + ), + )); + } + info!( + "Calling split_to with payload_len={}, buf.len()={}", + payload_len, + buf.len() + ); + let payload: &mut BytesMut = &mut buf.split_to(payload_len as usize); + + match header.nlmsg_type { + CONST_NETLINK_ERR => { + let error = NetlinkError::from_wire(payload)?; + Ok((header, NetlinkPayload::Error(error))) + } + CONST_NETLINK_NOOP => Ok((header, NetlinkPayload::Noop())), + CONST_NETLINK_ROUTE => { + let (rt_msg, attrs) = take_route_message(payload)?; + Ok((header, NetlinkPayload::Route(rt_msg, attrs))) + } + libc::RTM_NEWROUTE | libc::RTM_GETROUTE | libc::RTM_DELROUTE => { + let (rt_msg, attrs) = take_route_message(payload)?; + Ok((header, NetlinkPayload::Route(rt_msg, attrs))) + } + unknown => Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + format!("Unknown netlink message type: {}", unknown), + )), + } +} + +/// take_route_messaage attemts to parse a route message and attributes from the +/// provided buffer. It expects the header is already removed and the buffer is +/// trimmed of any padding. +pub fn take_route_message( + buf: &mut BytesMut, +) -> Result<(RouteMessage, Vec), std::io::Error> { + let rt_msg = RouteMessage::from_wire(buf)?; + let mut attributes = Vec::::new(); + + while buf.len() > 3 { + let attr = RouteAttribute::from_wire(buf)?; + attributes.push(attr); + } + + Ok((rt_msg, attributes)) +} + +// NetlinkHeader is equivalent to nlmsghdr from the kernel. +// https://man7.org/linux/man-pages/man7/netlink.7.html +#[repr(C)] +#[derive(Debug)] +pub struct NetlinkHeader { + pub nlmsg_len: u32, + pub nlmsg_type: u16, + pub nlmsg_flags: u16, + pub nlmsg_seq: u32, + pub nlmsg_pid: u32, +} + +impl Serializable for NetlinkHeader { + fn to_wire(&self, buf: &mut BytesMut) -> Result<(), std::io::Error> { + let mut writer = buf.writer(); + writer.write_u32::(self.nlmsg_len)?; + writer.write_u16::(self.nlmsg_type)?; + writer.write_u16::(self.nlmsg_flags)?; + writer.write_u32::(self.nlmsg_seq)?; + writer.write_u32::(self.nlmsg_pid)?; + + Ok(()) + } + fn from_wire(buf: &mut BytesMut) -> Result { + let mut reader = buf.reader(); + let nlmsg_len = reader.read_u32::()?; + let nlmsg_type = reader.read_u16::()?; + let nlmsg_flags = reader.read_u16::()?; + let nlmsg_seq = reader.read_u32::()?; + let nlmsg_pid = reader.read_u32::()?; + + Ok(NetlinkHeader { + nlmsg_len, + nlmsg_type, + nlmsg_flags, + nlmsg_seq, + nlmsg_pid, + }) + } +} + +impl Display for NetlinkHeader { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "NetlinkHeader [ len: {}, type: {}, flags: {}, seq: {}, pid: {} ]", + self.nlmsg_len, self.nlmsg_type, self.nlmsg_flags, self.nlmsg_seq, self.nlmsg_pid + ) + } +} + +#[repr(C)] +#[derive(Debug)] +pub struct NetlinkError { + pub error: i32, + pub msg: NetlinkHeader, + // Other attributes that we're not parsing right now. + pub payload: Vec, +} + +impl Serializable for NetlinkError { + fn to_wire(&self, buf: &mut BytesMut) -> Result<(), std::io::Error> { + buf.writer().write_i32::(self.error)?; + self.msg.to_wire(buf)?; + buf.writer().write_all(&self.payload)?; + Ok(()) + } + fn from_wire(buf: &mut BytesMut) -> Result { + let mut reader = buf.reader(); + let error = reader.read_i32::()?; + let msg = NetlinkHeader::from_wire(buf)?; + let payload: Vec = buf.to_owned().to_vec(); + Ok(NetlinkError { + error, + msg, + payload, + }) + } +} + +#[repr(C)] +#[derive(Debug, Default)] +pub struct RouteMessage { + // address family + pub af: u8, + pub dst_len: u8, + pub src_len: u8, + pub tos: u8, + pub table: u8, + pub protocol: u8, + pub scope: u8, + pub r#type: u8, + pub flags: u32, +} + +impl Serializable for RouteMessage { + fn to_wire(&self, buf: &mut BytesMut) -> Result<(), std::io::Error> { + let mut writer = buf.writer(); + writer.write_u8(self.af)?; + writer.write_u8(self.dst_len)?; + writer.write_u8(self.src_len)?; + writer.write_u8(self.tos)?; + writer.write_u8(self.table)?; + writer.write_u8(self.protocol)?; + writer.write_u8(self.scope)?; + writer.write_u8(self.r#type)?; + writer.write_u32::(self.flags)?; + Ok(()) + } + fn from_wire(buf: &mut BytesMut) -> Result { + // Check that the length is at least the size of a RouteMessage + if buf.len() < std::mem::size_of::() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Buffer not large enough to read RouteMessage".to_string(), + )); + } + + let mut reader = buf.reader(); + let af = reader.read_u8()?; + let dst_len = reader.read_u8()?; + let src_len = reader.read_u8()?; + let tos = reader.read_u8()?; + let table = reader.read_u8()?; + let protocol = reader.read_u8()?; + let scope = reader.read_u8()?; + let r#type = reader.read_u8()?; + let flags = reader.read_u32::()?; + + Ok(RouteMessage { + af, + dst_len, + src_len, + tos, + table, + protocol, + scope, + r#type, + flags, + }) + } +} + +#[derive(Debug, Eq, PartialEq)] +pub enum RouteAttribute { + Dst(Vec), + Src(Vec), + Iif(u32), + Oif(u32), + Gateway(Vec), + Priority(u32), + Prefsrc(u32), + Metrics(u32), + // TODO: support multipath attribute properly + Multipath(Vec), + Flow(u32), + // TODO: support cacheinfo properly + CacheInfo(Vec), + Table(u32), + Mark(u32), + // TODO: support mfc_stats properly + MfcStats(Vec), + // TODO: support via properly + Via(Vec), + NewDst(Vec), + Pref(u8), + EnacpType(u16), + Encap(Vec), +} + +impl NetlinkAttribute for RouteAttribute { + fn attr_type(&self) -> u16 { + match self { + RouteAttribute::Dst(_) => constants::RTA_DST, + RouteAttribute::Src(_) => constants::RTA_SRC, + RouteAttribute::Iif(_) => constants::RTA_IIF, + RouteAttribute::Oif(_) => constants::RTA_OIF, + RouteAttribute::Gateway(_) => constants::RTA_GATEWAY, + RouteAttribute::Priority(_) => constants::RTA_PRIORITY, + RouteAttribute::Prefsrc(_) => constants::RTA_PREFSRC, + RouteAttribute::Metrics(_) => constants::RTA_METRICS, + RouteAttribute::Multipath(_) => constants::RTA_MULTIPATH, + RouteAttribute::Flow(_) => constants::RTA_FLOW, + RouteAttribute::CacheInfo(_) => constants::RTA_CACHEINFO, + RouteAttribute::Table(_) => constants::RTA_TABLE, + RouteAttribute::Mark(_) => constants::RTA_MARK, + RouteAttribute::MfcStats(_) => constants::RTA_MFC_STATS, + RouteAttribute::Via(_) => constants::RTA_VIA, + RouteAttribute::NewDst(_) => constants::RTA_NEWDST, + RouteAttribute::Pref(_) => constants::RTA_PREF, + RouteAttribute::EnacpType(_) => constants::RTA_ENCAP_TYPE, + RouteAttribute::Encap(_) => constants::RTA_ENCAP, + } + } + fn payload_len(&self) -> u16 { + match self { + RouteAttribute::Dst(dst) => dst.len() as u16, + RouteAttribute::Src(src) => src.len() as u16, + RouteAttribute::Iif(_) => 4, + RouteAttribute::Oif(_) => 4, + RouteAttribute::Gateway(gateway) => gateway.len() as u16, + RouteAttribute::Priority(_) => 4, + RouteAttribute::Prefsrc(_) => 4, + RouteAttribute::Metrics(_) => 4, + RouteAttribute::Multipath(multipath) => multipath.len() as u16, + RouteAttribute::Flow(_) => 4, + RouteAttribute::CacheInfo(cacheinfo) => cacheinfo.len() as u16, + RouteAttribute::Table(_) => 4, + RouteAttribute::Mark(_) => 4, + RouteAttribute::MfcStats(stats) => stats.len() as u16, + RouteAttribute::Via(via) => via.len() as u16, + RouteAttribute::NewDst(newdst) => newdst.len() as u16, + RouteAttribute::Pref(_) => 1, + RouteAttribute::EnacpType(_) => 2, + RouteAttribute::Encap(encap) => encap.len() as u16, + } + } + fn write_payload(&self, buf: &mut BytesMut) -> Result<(), std::io::Error> { + let mut writer = buf.writer(); + match self { + RouteAttribute::Dst(dst) => buf.put(dst.as_slice()), + RouteAttribute::Src(src) => buf.put(src.as_slice()), + RouteAttribute::Iif(iif) => writer.write_u32::(*iif)?, + RouteAttribute::Oif(oif) => writer.write_u32::(*oif)?, + RouteAttribute::Gateway(gateway) => buf.put(gateway.as_slice()), + RouteAttribute::Priority(priority) => writer.write_u32::(*priority)?, + RouteAttribute::Prefsrc(prefsrc) => writer.write_u32::(*prefsrc)?, + RouteAttribute::Metrics(metrics) => writer.write_u32::(*metrics)?, + RouteAttribute::Multipath(multipath) => buf.put(multipath.as_slice()), + RouteAttribute::Flow(flow) => writer.write_u32::(*flow)?, + RouteAttribute::CacheInfo(cacheinfo) => buf.put(cacheinfo.as_slice()), + RouteAttribute::Table(table) => writer.write_u32::(*table)?, + RouteAttribute::Mark(mark) => writer.write_u32::(*mark)?, + RouteAttribute::MfcStats(stats) => buf.put(stats.as_slice()), + RouteAttribute::Via(via) => buf.put(via.as_slice()), + RouteAttribute::NewDst(newdst) => buf.put(newdst.as_slice()), + RouteAttribute::Pref(pref) => buf.put_u8(*pref), + RouteAttribute::EnacpType(encaptype) => writer.write_u16::(*encaptype)?, + RouteAttribute::Encap(encap) => buf.put(encap.as_slice()), + }; + Ok(()) + } +} + +impl Serializable for RouteAttribute { + fn to_wire(&self, buf: &mut BytesMut) -> Result<(), std::io::Error> { + // Write Type, Length, Value then pad to 4 byte boundary. + let mut writer = buf.writer(); + writer.write_u16::(self.payload_len() + 4)?; + writer.write_u16::(self.attr_type())?; + self.write_payload(buf)?; + + // Align the attribute to a four byte boundary. + let padding = (4 + self.payload_len()) % 4; + buf.put(vec![0u8; padding.into()].as_slice()); + + Ok(()) + } + fn from_wire(buf: &mut BytesMut) -> Result { + let mut reader = buf.reader(); + let attr_len: u16 = reader.read_u16::()?; + let attr_type: u16 = reader.read_u16::()?; + let padding = attr_len % 4; + + if attr_len < 4 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "route attr cannot have length < 4", + )); + } + let payload_len = attr_len - 4; + if buf.remaining() < payload_len.into() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!( + "Route attribute length was {} but buf has {} remaining", + payload_len, + buf.remaining() + ), + )); + } + let mut payload: Vec = vec![0u8; payload_len.into()]; + let bytes_read = buf.reader().read(&mut payload)?; + if bytes_read != payload_len.into() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!( + "Failed to read {} bytes of payload, instead got {}", + payload_len, bytes_read + ), + )); + } + + // Move buf past padding bytes. + buf.advance(padding.into()); + + match attr_type { + constants::RTA_DST => Ok(RouteAttribute::Dst(payload)), + constants::RTA_SRC => Ok(RouteAttribute::Src(payload)), + constants::RTA_IIF => { + check_vec_len!(payload, 4); + Ok(RouteAttribute::Iif(NativeEndian::read_u32(&payload))) + } + constants::RTA_OIF => { + check_vec_len!(payload, 4); + Ok(RouteAttribute::Oif(NativeEndian::read_u32(&payload))) + } + constants::RTA_GATEWAY => Ok(RouteAttribute::Gateway(payload)), + constants::RTA_PRIORITY => { + check_vec_len!(payload, 4); + Ok(RouteAttribute::Priority(NativeEndian::read_u32(&payload))) + } + constants::RTA_PREFSRC => { + check_vec_len!(payload, 4); + Ok(RouteAttribute::Prefsrc(NativeEndian::read_u32(&payload))) + } + constants::RTA_METRICS => { + check_vec_len!(payload, 4); + Ok(RouteAttribute::Metrics(NativeEndian::read_u32(&payload))) + } + constants::RTA_MULTIPATH => Ok(RouteAttribute::Multipath(payload)), + constants::RTA_FLOW => { + check_vec_len!(payload, 4); + Ok(RouteAttribute::Flow(NativeEndian::read_u32(buf))) + } + constants::RTA_CACHEINFO => Ok(RouteAttribute::CacheInfo(payload)), + constants::RTA_TABLE => { + check_vec_len!(payload, 4); + Ok(RouteAttribute::Table(NativeEndian::read_u32(&payload))) + } + constants::RTA_MARK => { + check_vec_len!(payload, 4); + Ok(RouteAttribute::Mark(NativeEndian::read_u32(&payload))) + } + constants::RTA_MFC_STATS => Ok(RouteAttribute::MfcStats(payload)), + constants::RTA_VIA => Ok(RouteAttribute::CacheInfo(payload)), + constants::RTA_NEWDST => Ok(RouteAttribute::CacheInfo(payload)), + constants::RTA_PREF => { + check_vec_len!(payload, 1); + Ok(RouteAttribute::Pref(payload[0])) + } + constants::RTA_ENCAP_TYPE => { + check_vec_len!(payload, 2); + Ok(RouteAttribute::EnacpType(NativeEndian::read_u16(&payload))) + } + constants::RTA_ENCAP => Ok(RouteAttribute::Encap(payload)), + _ => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unknown attribute type: {}", attr_type), + )) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::RouteAttribute; + use crate::packet::parse_netlink_message; + use crate::traits::Serializable; + use bytes::BytesMut; + + #[test] + fn routemessage_roundtrip() { + let _payload = &[ + 0x74, 0x00, 0x00, 0x00, 0x18, 0x00, 0x02, 0x00, 0x35, 0x86, 0x00, 0x00, 0x31, 0x2f, + 0x05, 0x00, 0x0a, 0x80, 0x00, 0x00, 0xfe, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x0f, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x14, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x08, 0x00, 0x06, 0x00, 0x00, 0x01, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x24, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x14, 0x00, + 0x00, 0x00, 0x00, 0x00, + ]; + } + + #[test] + fn rta_table() { + let payload: &[u8] = &[0x08, 0x00, 0x0f, 0x00, 0xff, 0x00, 0x00, 0x00]; + let attr = RouteAttribute::from_wire(&mut BytesMut::from(payload)); + assert_eq!(RouteAttribute::Table(0xff), attr.unwrap()); + } + + #[test] + fn rta_dst() { + let payload: &[u8] = &[ + 0x14, 0x00, 0x01, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + let attr = RouteAttribute::from_wire(&mut BytesMut::from(payload)); + assert_eq!( + RouteAttribute::Dst(vec![255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,]), + attr.unwrap() + ); + } + + #[test] + fn parse_netlink_example_error() { + let payload_str = "58000000020000000319000022311300edffffff410000001800050003190000000000000a280000000000000000000009000100200116b8170014000500200108e009ff2000000000000000000208000f00c90000000000"; + let payload = hex::decode(payload_str).expect("Test data hex decode failed"); + let mut buf = BytesMut::from(payload.as_slice()); + let res = parse_netlink_message(&mut buf).unwrap(); + println!("Parsed netlink message: {:?}", res); + // assert_eq!(initial_capacity, buf.capacity()); + } + + // TODO: Clean this up to test only rtnetlink messages. + // This blob contains link add meessages. + // #[test] + // fn parse_netlink_message_invariants() { + // let payload: &[u8] = &[ + // /* Netlink route*/ 0xf4, 0x03, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x65, 0xbf, + // 0xe2, 0x61, 0xe8, 0x48, 0x85, 0xaa, 0x00, 0x00, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, + // 0x43, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x77, 0x6c, + // 0x70, 0x30, 0x73, 0x32, 0x30, 0x66, 0x33, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0d, 0x00, + // 0xe8, 0x03, 0x00, 0x00, 0x05, 0x00, 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, 0x05, 0x00, + // 0x11, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x8c, 0x05, 0x00, 0x00, + // 0x08, 0x00, 0x32, 0x00, 0x00, 0x01, 0x00, 0x00, 0x08, 0x00, 0x33, 0x00, 0x00, 0x09, + // 0x00, 0x00, 0x08, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x1e, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x1f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, + // 0x28, 0x00, 0xff, 0xff, 0x00, 0x00, 0x08, 0x00, 0x29, 0x00, 0x00, 0x00, 0x01, 0x00, + // 0x08, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x21, 0x00, 0x01, 0x00, + // 0x00, 0x00, 0x0c, 0x00, 0x06, 0x00, 0x6e, 0x6f, 0x71, 0x75, 0x65, 0x75, 0x65, 0x00, + // 0x08, 0x00, 0x23, 0x00, 0x5e, 0x00, 0x00, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x2f, 0x00, + // 0x00, 0x00, 0x08, 0x00, 0x30, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x05, 0x00, 0x27, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x24, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, + // 0x01, 0x00, 0x1c, 0x99, 0x57, 0xd9, 0x60, 0xa2, 0x00, 0x00, 0x0a, 0x00, 0x02, 0x00, + // 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0xc4, 0x00, 0x17, 0x00, 0x9a, 0x2d, + // 0xbd, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0x62, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0xb6, 0x31, 0x63, 0x29, 0x03, 0x00, 0x00, 0x00, 0x15, 0xff, 0x5f, 0x7e, 0x01, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x07, 0x00, 0x9a, 0x2d, + // 0xbd, 0x00, 0xc2, 0x62, 0x53, 0x00, 0xb6, 0x31, 0x63, 0x29, 0x15, 0xff, 0x5f, 0x7e, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x15, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x2b, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x0a, 0x00, 0x36, 0x00, 0x1c, 0x99, 0x57, 0xd9, 0x60, 0xa2, 0x00, 0x00, + // 0x90, 0x01, 0x1a, 0x00, 0x88, 0x00, 0x02, 0x00, 0x84, 0x00, 0x01, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + // 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x10, 0x27, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x04, 0x01, 0x0a, 0x00, 0x08, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x80, 0x14, 0x00, + // 0x05, 0x00, 0xff, 0xff, 0x00, 0x00, 0xf2, 0xea, 0xf6, 0x00, 0x44, 0x66, 0x00, 0x00, + // 0xe8, 0x03, 0x00, 0x00, 0xe4, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, + // 0x00, 0x00, 0x8c, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + // 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xa0, 0x0f, + // 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3a, 0x09, 0x00, + // 0x80, 0x51, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, 0x58, 0x02, 0x00, 0x00, 0x10, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + // 0x01, 0x00, 0x00, 0x00, 0x60, 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x10, 0x27, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xee, + // 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + // 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x11, 0x00, 0x38, 0x00, 0x30, 0x30, + // 0x30, 0x30, 0x3a, 0x30, 0x30, 0x3a, 0x31, 0x34, 0x2e, 0x33, 0x00, 0x00, 0x00, 0x00, + // 0x08, 0x00, 0x39, 0x00, 0x70, 0x63, 0x69, 0x00, + // ]; + + // let mut buf = BytesMut::from(payload); + // let initial_capacity = buf.capacity(); + // let res = parse_netlink_message(&mut buf); + // println!("Parsed netlink message: {:?}", res); + // assert_eq!(initial_capacity, buf.capacity()); + // } +} diff --git a/netlink/src/traits.rs b/netlink/src/traits.rs new file mode 100644 index 0000000..48c12cc --- /dev/null +++ b/netlink/src/traits.rs @@ -0,0 +1,26 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bytes::BytesMut; + +pub trait Serializable { + fn to_wire(&self, buf: &mut BytesMut) -> Result<(), std::io::Error>; + fn from_wire(buf: &mut BytesMut) -> Result; +} + +pub trait NetlinkAttribute { + fn attr_type(&self) -> u16; + fn payload_len(&self) -> u16; + fn write_payload(&self, buf: &mut BytesMut) -> Result<(), std::io::Error>; +} diff --git a/tests/integration_tests/Cargo.toml b/tests/integration_tests/Cargo.toml new file mode 100644 index 0000000..97e06fd --- /dev/null +++ b/tests/integration_tests/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "integration_tests" +version = "0.1.0" +authors = ["Rayhaan Jaufeerally "] +edition = "2018" +license = "Apache-2.0" + +[dependencies] +bgpd = { path = "../../bgpd" } +bytes = "1.*" +tokio = { version = "1.6.1", features = ["full"] } +tokio-util = { version = "0.6.7", features = ["codec"] } +tracing = "0.1" +tracing-subscriber = "0.2" +libc = "0.2.126" + +[dev-dependencies] +serial_test = "0.5.1" + +[unstable] +thread_id_value = true \ No newline at end of file diff --git a/tests/integration_tests/rustfmt.toml b/tests/integration_tests/rustfmt.toml new file mode 100644 index 0000000..c51666e --- /dev/null +++ b/tests/integration_tests/rustfmt.toml @@ -0,0 +1 @@ +edition = "2018" \ No newline at end of file diff --git a/tests/integration_tests/tests/basic_startup.rs b/tests/integration_tests/tests/basic_startup.rs new file mode 100644 index 0000000..c42f880 --- /dev/null +++ b/tests/integration_tests/tests/basic_startup.rs @@ -0,0 +1,558 @@ +// Copyright 2021 Rayhaan Jaufeerally. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bgpd::bgp_packet; +use bgpd::bgp_packet::constants::{AddressFamilyIdentifier, SubsequentAddressFamilyIdentifier}; +use bgpd::bgp_packet::messages::BGPSubmessage; +use bgpd::bgp_packet::traits::ParserContext; +use bgpd::server::bgp_server::Server; +use bgpd::server::config::{PeerConfig, ServerConfig}; +use std::io::{Read, Write}; +use std::mem::size_of; +use std::net::Ipv4Addr; +use std::net::Ipv6Addr; +use std::net::TcpListener; +use std::net::TcpStream; +use std::net::{IpAddr, SocketAddrV6}; +use std::os::unix::io::AsRawFd; +use std::time::Duration; +use tokio_util::codec::Decoder; +use tracing::info; + +#[macro_use] +extern crate serial_test; + +fn init() { + match tracing_subscriber::fmt() + .with_env_filter("bgpd=trace,tokio=trace,basic_startup=trace") + .try_init() + { + Ok(()) => {} + Err(e) => { + eprintln!("Failed to setup tracing: {}", e); + } + } +} + +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_bgp_listener_simple() { + init(); + let sc = ServerConfig { + asn: 65535, + hold_time: 10, + identifier: Ipv4Addr::new(127, 0, 0, 1), + grpc_addr: None, + http_addr: None, + listen_addrs: vec!["[::]:9179".to_owned()], + peers: vec![], + }; + + let mut bgp_server = Server::new(sc); + bgp_server.start(true).await.unwrap(); + + // Try to connect to localhost:9179 and it should connect. + assert!(TcpStream::connect("[::1]:9179").is_ok()); + bgp_server.shutdown().await; +} + +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_bgp_listener_unknown_peer() { + init(); + let sc = ServerConfig { + asn: 65535, + hold_time: 10, + identifier: Ipv4Addr::new(127, 0, 0, 1), + grpc_addr: None, + http_addr: None, + listen_addrs: vec!["[::]:9179".to_owned()], + peers: vec![], + }; + + let mut bgp_server = Server::new(sc); + bgp_server.start(true).await.unwrap(); + + // Try to connect to localhost:9179 and it should connect. + let conn = TcpStream::connect_timeout(&"[::1]:9179".parse().unwrap(), Duration::from_secs(3)); + assert!(conn.is_ok()); + + let open_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x39, 0x01, 0x04, 0x00, 0x2a, 0x00, 0xb4, 0xd4, 0x19, 0x16, 0x26, 0x1c, 0x02, + 0x06, 0x01, 0x04, 0x00, 0x01, 0x00, 0x01, 0x02, 0x02, 0x80, 0x00, 0x02, 0x02, 0x02, 0x00, + 0x02, 0x02, 0x46, 0x00, 0x02, 0x06, 0x41, 0x04, 0x00, 0x00, 0x00, 0x2a, + ]; + + assert!(conn.as_ref().unwrap().write(open_msg_bytes).is_ok()); + + let mut buf = Vec::with_capacity(256); + assert_eq!(conn.unwrap().read(&mut buf).unwrap(), 0); + bgp_server.shutdown().await; +} + +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_bgp_listener_known_peer() { + init(); + let v6_addr: Ipv6Addr = "::1".parse().unwrap(); + let sc = ServerConfig { + asn: 65535, + hold_time: 10, + identifier: Ipv4Addr::new(127, 0, 0, 1), + grpc_addr: None, + http_addr: None, + listen_addrs: vec!["[::]:9179".to_owned()], + peers: vec![PeerConfig { + afi: AddressFamilyIdentifier::Ipv6, + safi: SubsequentAddressFamilyIdentifier::Unicast, + asn: 8758, + ip: IpAddr::V6(v6_addr), + announcements: vec![], + name: "local-test-peer".to_string(), + local_pref: 100, + port: None, + }], + }; + + let mut bgp_server = Server::new(sc); + bgp_server.start(true).await.unwrap(); + + // Try to connect to localhost:9179 and it should connect. + let mut conn = + TcpStream::connect_timeout(&"[::1]:9179".parse().unwrap(), Duration::from_secs(3)).unwrap(); + + // Make the stream blocking to be able to handle it easily in tests. + conn.set_nonblocking(false).unwrap(); + conn.set_read_timeout(None).unwrap(); + + let open_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x35, 0x01, 0x04, 0x22, 0x36, 0x00, 0xb4, 0xd4, 0x19, 0x1b, 0x2d, 0x18, 0x02, + 0x06, 0x01, 0x04, 0x00, 0x02, 0x00, 0x01, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x80, 0x00, + 0x02, 0x06, 0x41, 0x04, 0x00, 0x00, 0x22, 0x36, + ]; + + assert!(conn.write_all(open_msg_bytes).is_ok()); + + let mut open_buf = vec![0u8; 65536]; + conn.read(&mut open_buf).unwrap(); + + let mut codec = bgp_packet::messages::Codec { + ctx: ParserContext { + four_octet_asn: None, + nlri_mode: None, + }, + }; + + let response_open_msg = codec + .decode(&mut bytes::BytesMut::from(open_buf.as_slice())) + .unwrap(); + + info!("Response message is: {:?}", response_open_msg); + match response_open_msg.unwrap().payload { + BGPSubmessage::OpenMessage(_open) => {} + _ => { + assert!(false); + } + } + + // Check that the server sends a keepalive after the open message. + + let mut ka_buf = vec![0u8; 65536]; + conn.read(&mut ka_buf).unwrap(); + let response_ka_message = codec + .decode(&mut bytes::BytesMut::from(ka_buf.as_slice())) + .unwrap(); + + match response_ka_message.unwrap().payload { + BGPSubmessage::KeepaliveMessage(_ka) => {} + _ => { + assert!(false); + } + } + + bgp_server.shutdown().await; +} + +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_bgp_peer_statemachine_outbound_conn() { + init(); + let v6_addr: Ipv6Addr = "::1".parse().unwrap(); + + // Listen on some arbitrary port and put that port into the config for the server to dial out to. + let listener = TcpListener::bind("[::1]:0".parse::().unwrap()).unwrap(); + info!("Listener is listening on: {:?}", listener.local_addr()); + let port: u16 = listener.local_addr().unwrap().port(); + + let sc = ServerConfig { + asn: 65535, + hold_time: 10, + identifier: Ipv4Addr::new(127, 0, 0, 1), + grpc_addr: None, + http_addr: None, + listen_addrs: vec!["[::]:9179".to_owned()], + peers: vec![PeerConfig { + afi: AddressFamilyIdentifier::Ipv6, + safi: SubsequentAddressFamilyIdentifier::Unicast, + asn: 8758, + ip: IpAddr::V6(v6_addr), + port: Some(port), + announcements: vec![], + name: "local-test-peer".to_string(), + local_pref: 100, + }], + }; + + let mut bgp_server = Server::new(sc); + bgp_server.start(true).await.unwrap(); + + // Wait for the connection from the bgp_server. + info!("Waiting for connection in test"); + let (mut conn, _) = listener.accept().unwrap(); + info!("Got a connection in test"); + + let open_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x35, 0x01, 0x04, 0x22, 0x36, 0x00, 0xb4, 0xd4, 0x19, 0x1b, 0x2d, 0x18, 0x02, + 0x06, 0x01, 0x04, 0x00, 0x02, 0x00, 0x01, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x80, 0x00, + 0x02, 0x06, 0x41, 0x04, 0x00, 0x00, 0x22, 0x36, + ]; + + assert!(conn.write_all(open_msg_bytes).is_ok()); + + let mut open_buf = vec![0u8; 65536]; + conn.read(&mut open_buf).unwrap(); + + let mut codec = bgp_packet::messages::Codec { + ctx: ParserContext { + four_octet_asn: None, + nlri_mode: None, + }, + }; + + let response_open_msg = codec + .decode(&mut bytes::BytesMut::from(open_buf.as_slice())) + .unwrap(); + + info!("Response message is: {:?}", response_open_msg); + match response_open_msg.unwrap().payload { + BGPSubmessage::OpenMessage(_open) => {} + _ => { + assert!(false); + } + } + + // Check that the server sends a keepalive after the open message. + + let mut ka_buf = vec![0u8; 65536]; + conn.read(&mut ka_buf).unwrap(); + let response_ka_message = codec + .decode(&mut bytes::BytesMut::from(ka_buf.as_slice())) + .unwrap(); + + match response_ka_message.unwrap().payload { + BGPSubmessage::KeepaliveMessage(_ka) => {} + _ => { + assert!(false); + } + } + + bgp_server.shutdown().await; +} + +#[tokio::test(flavor = "multi_thread")] +#[serial] +// Check that reconnecting to a connection that was previously established works. +async fn test_bgp_peer_statemachine_outbound_reconnection() { + init(); + let v6_addr: Ipv6Addr = "::1".parse().unwrap(); + + // Listen on some arbitrary port and put that port into the config for the server to dial out to. + let listener = TcpListener::bind("[::1]:0".parse::().unwrap()).unwrap(); + info!("Listener is listening on: {:?}", listener.local_addr()); + let port: u16 = listener.local_addr().unwrap().port(); + + let sc = ServerConfig { + asn: 65535, + hold_time: 10, + identifier: Ipv4Addr::new(127, 0, 0, 1), + grpc_addr: None, + http_addr: None, + listen_addrs: vec!["[::]:9179".to_owned()], + peers: vec![PeerConfig { + afi: AddressFamilyIdentifier::Ipv6, + safi: SubsequentAddressFamilyIdentifier::Unicast, + asn: 8758, + ip: IpAddr::V6(v6_addr), + port: Some(port), + announcements: vec![], + name: "local-test-peer".to_string(), + local_pref: 100, + }], + }; + + let mut bgp_server = Server::new(sc); + bgp_server.start(true).await.unwrap(); + + // Wait for the connection from the bgp_server. + info!("Waiting for connection in test"); + let (mut conn, _) = listener.accept().unwrap(); + info!("Got a connection in test"); + + let open_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x35, 0x01, 0x04, 0x22, 0x36, 0x00, 0xb4, 0xd4, 0x19, 0x1b, 0x2d, 0x18, 0x02, + 0x06, 0x01, 0x04, 0x00, 0x02, 0x00, 0x01, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x80, 0x00, + 0x02, 0x06, 0x41, 0x04, 0x00, 0x00, 0x22, 0x36, + ]; + + assert!(conn.write_all(open_msg_bytes).is_ok()); + + let mut open_buf = vec![0u8; 65536]; + conn.read(&mut open_buf).unwrap(); + + let mut codec = bgp_packet::messages::Codec { + ctx: ParserContext { + four_octet_asn: None, + nlri_mode: None, + }, + }; + + let response_open_msg = codec + .decode(&mut bytes::BytesMut::from(open_buf.as_slice())) + .unwrap(); + + info!("Response message is: {:?}", response_open_msg); + match response_open_msg.unwrap().payload { + BGPSubmessage::OpenMessage(_open) => {} + _ => { + assert!(false); + } + } + + // Check that the server sends a keepalive after the open message. + + let mut ka_buf = vec![0u8; 65536]; + conn.read(&mut ka_buf).unwrap(); + let response_ka_message = codec + .decode(&mut bytes::BytesMut::from(ka_buf.as_slice())) + .unwrap(); + + match response_ka_message.unwrap().payload { + BGPSubmessage::KeepaliveMessage(_ka) => {} + _ => { + assert!(false); + } + } + + conn.shutdown(std::net::Shutdown::Both).unwrap(); + + // Expect that the other side reconnects to re-establish the connection. + info!("Waiting for re-connection in test"); + let (mut conn, _) = listener.accept().unwrap(); + info!("Got the re-connection in test"); + + let open_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x35, 0x01, 0x04, 0x22, 0x36, 0x00, 0xb4, 0xd4, 0x19, 0x1b, 0x2d, 0x18, 0x02, + 0x06, 0x01, 0x04, 0x00, 0x02, 0x00, 0x01, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x80, 0x00, + 0x02, 0x06, 0x41, 0x04, 0x00, 0x00, 0x22, 0x36, + ]; + + assert!(conn.write_all(open_msg_bytes).is_ok()); + + let mut open_buf = vec![0u8; 65536]; + conn.read(&mut open_buf).unwrap(); + + let mut codec = bgp_packet::messages::Codec { + ctx: ParserContext { + four_octet_asn: None, + nlri_mode: None, + }, + }; + + let response_open_msg = codec + .decode(&mut bytes::BytesMut::from(open_buf.as_slice())) + .unwrap(); + + info!("Response message is: {:?}", response_open_msg); + match response_open_msg.unwrap().payload { + BGPSubmessage::OpenMessage(_open) => {} + _ => { + assert!(false); + } + } + + // Check that the server sends a keepalive after the open message. + + let mut ka_buf = vec![0u8; 65536]; + conn.read(&mut ka_buf).unwrap(); + let response_ka_message = codec + .decode(&mut bytes::BytesMut::from(ka_buf.as_slice())) + .unwrap(); + + match response_ka_message.unwrap().payload { + BGPSubmessage::KeepaliveMessage(_ka) => {} + _ => { + assert!(false); + } + } + + bgp_server.shutdown().await; +} + +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_bgp_listener_known_peer_inbound_reconnection() { + init(); + let v6_addr: Ipv6Addr = "::1".parse().unwrap(); + let sc = ServerConfig { + asn: 65535, + hold_time: 10, + identifier: Ipv4Addr::new(127, 0, 0, 1), + grpc_addr: None, + http_addr: None, + listen_addrs: vec!["[::]:9179".to_owned()], + peers: vec![PeerConfig { + afi: AddressFamilyIdentifier::Ipv6, + safi: SubsequentAddressFamilyIdentifier::Unicast, + asn: 8758, + ip: IpAddr::V6(v6_addr), + announcements: vec![], + name: "local-test-peer".to_string(), + local_pref: 100, + port: None, + }], + }; + + let mut bgp_server = Server::new(sc); + bgp_server.start(true).await.unwrap(); + + // Try to connect to localhost:9179 and it should connect. + let mut conn = + TcpStream::connect_timeout(&"[::1]:9179".parse().unwrap(), Duration::from_secs(3)).unwrap(); + + // Make the stream blocking to be able to handle it easily in tests. + conn.set_nonblocking(false).unwrap(); + conn.set_read_timeout(None).unwrap(); + + // Unsafe set linger: simulate a broken TCP stream by setting linger with a deadline of 0. + // This causes a RST packet to be sent instead of a FIN, which means that the other side + // will exercise the error path. + unsafe { + let val: libc::linger = libc::linger { + l_onoff: 1, + l_linger: 0, + }; + let ret_val = libc::setsockopt( + conn.as_raw_fd(), + libc::SOL_SOCKET, + libc::SO_LINGER, + &val as *const libc::linger as *const libc::c_void, + size_of::() as libc::socklen_t, + ); + assert!(ret_val == 0); + } + + let open_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x35, 0x01, 0x04, 0x22, 0x36, 0x00, 0xb4, 0xd4, 0x19, 0x1b, 0x2d, 0x18, 0x02, + 0x06, 0x01, 0x04, 0x00, 0x02, 0x00, 0x01, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x80, 0x00, + 0x02, 0x06, 0x41, 0x04, 0x00, 0x00, 0x22, 0x36, + ]; + + assert!(conn.write_all(open_msg_bytes).is_ok()); + + let mut open_buf = vec![0u8; 65536]; + conn.read(&mut open_buf).unwrap(); + + let mut codec = bgp_packet::messages::Codec { + ctx: ParserContext { + four_octet_asn: None, + nlri_mode: None, + }, + }; + + let response_open_msg = codec + .decode(&mut bytes::BytesMut::from(open_buf.as_slice())) + .unwrap(); + + info!("Response message is: {:?}", response_open_msg); + match response_open_msg.unwrap().payload { + BGPSubmessage::OpenMessage(_open) => {} + _ => { + assert!(false); + } + } + + // Check that the server sends a keepalive after the open message. + + let mut ka_buf = vec![0u8; 65536]; + conn.read(&mut ka_buf).unwrap(); + let response_ka_message = codec + .decode(&mut bytes::BytesMut::from(ka_buf.as_slice())) + .unwrap(); + + match response_ka_message.unwrap().payload { + BGPSubmessage::KeepaliveMessage(_ka) => {} + _ => { + assert!(false); + } + } + + // conn.shutdown(std::net::Shutdown::Both).unwrap(); + drop(conn); + + // Try to connect to localhost:9179 and it should connect and send the OPEN message. + let mut conn = + TcpStream::connect_timeout(&"[::1]:9179".parse().unwrap(), Duration::from_secs(3)).unwrap(); + + let open_msg_bytes: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0x35, 0x01, 0x04, 0x22, 0x36, 0x00, 0xb4, 0xd4, 0x19, 0x1b, 0x2d, 0x18, 0x02, + 0x06, 0x01, 0x04, 0x00, 0x02, 0x00, 0x01, 0x02, 0x02, 0x02, 0x00, 0x02, 0x02, 0x80, 0x00, + 0x02, 0x06, 0x41, 0x04, 0x00, 0x00, 0x22, 0x36, + ]; + + assert!(conn.write_all(open_msg_bytes).is_ok()); + + let mut open_buf = vec![0u8; 65536]; + conn.set_read_timeout(Some(Duration::from_secs(3))).unwrap(); + conn.read(&mut open_buf).unwrap(); + + let mut codec = bgp_packet::messages::Codec { + ctx: ParserContext { + four_octet_asn: None, + nlri_mode: None, + }, + }; + + let response_open_msg = codec + .decode(&mut bytes::BytesMut::from(open_buf.as_slice())) + .unwrap(); + + info!("Response message is: {:?}", response_open_msg); + match response_open_msg.unwrap().payload { + BGPSubmessage::OpenMessage(_open) => {} + _ => { + assert!(false); + } + } + + info!("Reconnection successful"); + + bgp_server.shutdown().await; +}