1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 //! Determine displayed width of `char` and `str` types according to
12 //! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
13 //! rules.
14 //!
15 //! ```rust
16 //! extern crate unicode_width;
17 //!
18 //! use unicode_width::UnicodeWidthStr;
19 //!
20 //! fn main() {
21 //!     let teststr = "Hello, world!";
22 //!     let width = UnicodeWidthStr::width(teststr);
23 //!     println!("{}", teststr);
24 //!     println!("The above string is {} columns wide.", width);
25 //!     let width = teststr.width_cjk();
26 //!     println!("The above string is {} columns wide (CJK).", width);
27 //! }
28 //! ```
29 //!
30 //! # features
31 //!
32 //! unicode-width supports a `no_std` feature. This eliminates dependence
33 //! on std, and instead uses equivalent functions from core.
34 //!
35 //! # crates.io
36 //!
37 //! You can use this package in your project by adding the following
38 //! to your `Cargo.toml`:
39 //!
40 //! ```toml
41 //! [dependencies]
42 //! unicode-width = "0.1.5"
43 //! ```
44 
45 #![deny(missing_docs, unsafe_code)]
46 #![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
47        html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
48 
49 #![cfg_attr(feature = "bench", feature(test))]
50 #![no_std]
51 
52 // ANDROID: Unconditionally use std to allow building as a dylib.
53 #[macro_use]
54 extern crate std;
55 
56 #[cfg(feature = "bench")]
57 extern crate test;
58 
59 use tables::charwidth as cw;
60 pub use tables::UNICODE_VERSION;
61 
62 mod tables;
63 
64 #[cfg(test)]
65 mod tests;
66 
67 /// Methods for determining displayed width of Unicode characters.
68 pub trait UnicodeWidthChar {
69     /// Returns the character's displayed width in columns, or `None` if the
70     /// character is a control character other than `'\x00'`.
71     ///
72     /// This function treats characters in the Ambiguous category according
73     /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
74     /// as 1 column wide. This is consistent with the recommendations for non-CJK
75     /// contexts, or when the context cannot be reliably determined.
width(self) -> Option<usize>76     fn width(self) -> Option<usize>;
77 
78     /// Returns the character's displayed width in columns, or `None` if the
79     /// character is a control character other than `'\x00'`.
80     ///
81     /// This function treats characters in the Ambiguous category according
82     /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
83     /// as 2 columns wide. This is consistent with the recommendations for
84     /// CJK contexts.
width_cjk(self) -> Option<usize>85     fn width_cjk(self) -> Option<usize>;
86 }
87 
88 impl UnicodeWidthChar for char {
89     #[inline]
width(self) -> Option<usize>90     fn width(self) -> Option<usize> { cw::width(self, false) }
91 
92     #[inline]
width_cjk(self) -> Option<usize>93     fn width_cjk(self) -> Option<usize> { cw::width(self, true) }
94 }
95 
96 /// Methods for determining displayed width of Unicode strings.
97 pub trait UnicodeWidthStr {
98     /// Returns the string's displayed width in columns.
99     ///
100     /// Control characters are treated as having zero width.
101     ///
102     /// This function treats characters in the Ambiguous category according
103     /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
104     /// as 1 column wide. This is consistent with the recommendations for
105     /// non-CJK contexts, or when the context cannot be reliably determined.
width<'a>(&'a self) -> usize106     fn width<'a>(&'a self) -> usize;
107 
108     /// Returns the string's displayed width in columns.
109     ///
110     /// Control characters are treated as having zero width.
111     ///
112     /// This function treats characters in the Ambiguous category according
113     /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
114     /// as 2 column wide. This is consistent with the recommendations for
115     /// CJK contexts.
width_cjk<'a>(&'a self) -> usize116     fn width_cjk<'a>(&'a self) -> usize;
117 }
118 
119 impl UnicodeWidthStr for str {
120     #[inline]
width(&self) -> usize121     fn width(&self) -> usize {
122         self.chars().map(|c| cw::width(c, false).unwrap_or(0)).sum()
123     }
124 
125     #[inline]
width_cjk(&self) -> usize126     fn width_cjk(&self) -> usize {
127         self.chars().map(|c| cw::width(c, true).unwrap_or(0)).sum()
128     }
129 }
130