does not go down when Amazon's North Virginia DC goes down
VLC handles most audio, video and streaming formats
fuzzing
and the respective input-handling routines as a recognizer for that language
ex: ROP gadgets
weird machine
the grammar can be context-free, but the behaviour (state machine) heavily context sensitive
Be conservative in what you do, be liberal in what you accept from others
yet...
those choices may be wrong or not, but they're ambiguous
length-value is context sensitive
Length-value is annoying, but essential for partial parsing
token delimited means you could buffer forever
Fixed size would be cool -> regular grammar
formats already existing
parsing manually VS generated code
parser combinators: middle ground
emphasis on safety, zero cost abstractions
fix bugclasses, not bugs
No garbage collection
pub struct Slice<T> {
pub data: *const T,
pub len: usize,
}
let vec = vec![1, 2, 3];
let int_slice = &vec[1..];
a bet that we can make zero copy parsers in Rust
named!(parens<i64>, delimited!(
delimited!(
opt!(multispace),
char!("("),
opt!(multispace)
),
expr,
delimited!(
opt!(multispace),
char!(")"),
opt!(multispace)
)
)
);
pub enum IResult<Input,Output,CustomError=u32> {
Done(Input,Output),
Error(Err<Input,CustomError>),
Incomplete(Needed)
}
IResult<&[u8], &[u8]>
IResult<&[u8], &str>
IResult<&str, MyStruct>
named!(rm< &str,Vec<&str> >,
re_capture!(r"([:alpha:]+)\s+((\d+).(\d+).(\d+))")
);
rm("nom 0.3.11 hello");
// => Done(" hello", vec!["nom 0.3.11",
// "nom", "0.3.11", "0", "3", "11"]));
let input = vec![0b10101010, 0b11110000, 0b00110011];
tag_bits!( (&input[..], 0), u8, 3, 0b101)
// => Done((&sl[0..], 3), 5)
named!(http_version, chain!(
tag!("HTTP/") ~
version: take_while1!(is_version),
|| version));
small.mp4 (375 kB) | bigbuckbunny.mp4 (5.3 MB) | |
---|---|---|
hammer | 32424 ns/iter | 26523 ns/iter |
attoparsec | 1138 ns/iter (+/- 55.2) | 1124 ns/iter (+/- 62.3) |
cereal | 189 ns/iter (+/- 9.9) | 193 ns/iter (+/- 12.4) |
nom | 240 ns/iter (+/- 56) | 195 ns/iter (+/- 69) |
21kB | 104kB | |
---|---|---|
manual C | 62,451 ns/iter (+/- 1000 ns) | 300,000 ns/iter (+/- 16 ns) |
nom | 48,420 ns/iter (+/- 2,662 ns) | 250,547 ns/iter (+/- 6,967 ns) |
attoparsec | 241.5 μs/iter (+/-5.7 μs) | 1.836 ms/iter (+/- 137 μs) |
It is alright that nom gets more complex if building stuff with nom gets easier
pub enum IResult<Input,Output,CustomError=u32> {
Done(Input,Output),
Error(Err<Input,CustomError>),
Incomplete(Needed)
}
pub enum Needed {
Unknown,
Size(usize)
}
named!(take_5, take(5));
let res = take_5(&"abcd"[..]);
// => Incomplete(Needed::Size(5))
pub enum ConsumerState<O,E=(),M=()> {
Done(O),
Error(E),
Continue(M)
}
[...]
let mut m = MemProducer::new(&b"abcdefghijklabcdabcd"[..], 8);
let mut s1 = StateConsumer {
state: Continue(Consume(0)),
parsing: Initial
};
let mut s2 = StrConsumer { state: Continue(Consume(0)) };
let mut a = ChainConsumer::new(&mut s1, &mut s2);
let res = m.apply(&mut a));
actually, would be more interesting to handle errors at runtime
slice.to_hex(8);
ftyp header:
00000000 6d 70 34 32 00 00 00 00 mp42....
00000008 6d 70 34 32 69 73 6f 6d mp42isom
00000010 61 76 63 31 avc1
named!(f, dbg_dmp!( tag!( "abcd" ) ) );
let a = &b"efghijkl"[..];
f(a);
// -> Error(Position(Tag,
// [101, 102, 103, 104, 105, 106, 107, 108]))
// at l.1 by ' tag ! ( "abcd" ) '
// -> 00000000 65 66 67 68 69 6a 6b 6c efghijkl
pub enum IResult<Input,Output,CustomError=u32> {
Done(Input,Output),
Error(Err<Input,CustomError>),
Incomplete(Needed)
}
pub enum Err<Input,CustomError=u32>{
Code( ErrorKind<CustomError> ),
Position( ErrorKind<CustomError>, Input ),
Node( ErrorKind<CustomError>,
Box<Err<Input,CustomError>> ),
NodePosition( ErrorKind<CustomError>, Input,
Box<Err<Input,CustomError>> )
}
named!(err_test,
preceded!(tag!("efgh"), error!(42,
chain!(
tag!("ijkl") ~
res: error!(128, tag!("mnop")) ,
|| { res }
)
)
)
);
let err = err_test( &b"efghblah"[..] );
// -> Error( NodePosition(Custom(42),
// &b"efghblah"[..],
// Box::new(Position(Tag,
// &b"blah"[..]))
// ))
named!(err_test,
preceded!(tag!("efgh"), error!(42,
chain!(
tag!("ijkl") ~
res: error!(128, tag!("mnop")) ,
|| { res }
)
)
)
);
let err = err_test( &b"efghijklblah"[..]);
// -> Error(
// NodePosition(Custom(42),
// &b"ijklblah"[..],
// Box::new(NodePosition(Custom(128),
// &b"blah"[..],
// Box::new(Position(Tag,
// &b"blah"[..]))
// ))))
what can we do with that?
use nom::util::error_to_list;
fn error_to_string(e: Err) -> &str {
let v:Vec<u32> = error_to_list(e);
match &v[..] {
[Custom(42), Tag] => "missing `ijkl` tag",
[Custom(42),
Custom(128), Tag] => "missing `mnop` tag after `ijkl`",
_ => "unrecognized error"
}
}
let mut err_map = HashMap::new();
add_error_pattern(
&mut err_map,
// generate error from known bad input
parse(&b"efghaaaa"[..]),
"missing `ijkl` tag"
);
if let Error(e) = parse(&b"efghblah"[..]) {
let msg = err_map.get(&error_to_list(e));
// -> "missing `ijkl` tag"
};
if let Error(e) = parse(&b"efghijklblah"[..]) {
let msg = err_map.get(&error_to_list(e))
// -> "missing `mnop` tag after `ijkl`"
};
idea: generate error patterns from known bad inputs
named!(err_test, alt!(
tag!("abcd") |
error!(12, preceded!(tag!("efgh"),
error!(42, chain!(
tag!("ijk") ~
res: error!(128, tag!("mnop")) ,
|| { res }
))
))
));
dropbox started using months ago, when it was not very stable