Mercurial Hosting > luan
comparison src/org/eclipse/jetty/util/Utf8Appendable.java @ 802:3428c60d7cfc
replace jetty jars with source
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Wed, 07 Sep 2016 21:15:48 -0600 |
parents | |
children | 8e9db0bbf4f9 |
comparison
equal
deleted
inserted
replaced
801:6a21393191c1 | 802:3428c60d7cfc |
---|---|
1 // | |
2 // ======================================================================== | |
3 // Copyright (c) 1995-2014 Mort Bay Consulting Pty. Ltd. | |
4 // ------------------------------------------------------------------------ | |
5 // All rights reserved. This program and the accompanying materials | |
6 // are made available under the terms of the Eclipse Public License v1.0 | |
7 // and Apache License v2.0 which accompanies this distribution. | |
8 // | |
9 // The Eclipse Public License is available at | |
10 // http://www.eclipse.org/legal/epl-v10.html | |
11 // | |
12 // The Apache License v2.0 is available at | |
13 // http://www.opensource.org/licenses/apache2.0.php | |
14 // | |
15 // You may elect to redistribute this code under either of these licenses. | |
16 // ======================================================================== | |
17 // | |
18 | |
19 package org.eclipse.jetty.util; | |
20 | |
21 import java.io.IOException; | |
22 | |
23 import org.eclipse.jetty.util.log.Log; | |
24 import org.eclipse.jetty.util.log.Logger; | |
25 | |
26 /* ------------------------------------------------------------ */ | |
27 /** | |
28 * Utf8 Appendable abstract base class | |
29 * | |
30 * This abstract class wraps a standard {@link java.lang.Appendable} and provides methods to append UTF-8 encoded bytes, that are converted into characters. | |
31 * | |
32 * This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before state a character is appended to the string buffer. | |
33 * | |
34 * The UTF-8 decoding is done by this class and no additional buffers or Readers are used. The UTF-8 code was inspired by | |
35 * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ | |
36 * | |
37 * License information for Bjoern Hoehrmann's code: | |
38 * | |
39 * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> | |
40 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal | |
41 * in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
42 * copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: | |
43 * | |
44 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. | |
45 * | |
46 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | |
47 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | |
48 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
49 **/ | |
50 public abstract class Utf8Appendable | |
51 { | |
52 protected static final Logger LOG = Log.getLogger(Utf8Appendable.class); | |
53 public static final char REPLACEMENT = '\ufffd'; | |
54 private static final int UTF8_ACCEPT = 0; | |
55 private static final int UTF8_REJECT = 12; | |
56 | |
57 protected final Appendable _appendable; | |
58 protected int _state = UTF8_ACCEPT; | |
59 | |
60 private static final byte[] BYTE_TABLE = | |
61 { | |
62 // The first part of the table maps bytes to character classes that | |
63 // to reduce the size of the transition table and create bitmasks. | |
64 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
65 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
66 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
67 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
68 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, | |
69 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, | |
70 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
71 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8 | |
72 }; | |
73 | |
74 private static final byte[] TRANS_TABLE = | |
75 { | |
76 // The second part is a transition table that maps a combination | |
77 // of a state of the automaton and a character class to a state. | |
78 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, | |
79 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, | |
80 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, | |
81 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, | |
82 12,36,12,12,12,12,12,12,12,12,12,12 | |
83 }; | |
84 | |
85 private int _codep; | |
86 | |
87 public Utf8Appendable(Appendable appendable) | |
88 { | |
89 _appendable = appendable; | |
90 } | |
91 | |
92 public abstract int length(); | |
93 | |
94 protected void reset() | |
95 { | |
96 _state = UTF8_ACCEPT; | |
97 } | |
98 | |
99 public void append(byte b) | |
100 { | |
101 try | |
102 { | |
103 appendByte(b); | |
104 } | |
105 catch (IOException e) | |
106 { | |
107 throw new RuntimeException(e); | |
108 } | |
109 } | |
110 | |
111 public void append(byte[] b, int offset, int length) | |
112 { | |
113 try | |
114 { | |
115 int end = offset + length; | |
116 for (int i = offset; i < end; i++) | |
117 appendByte(b[i]); | |
118 } | |
119 catch (IOException e) | |
120 { | |
121 throw new RuntimeException(e); | |
122 } | |
123 } | |
124 | |
125 public boolean append(byte[] b, int offset, int length, int maxChars) | |
126 { | |
127 try | |
128 { | |
129 int end = offset + length; | |
130 for (int i = offset; i < end; i++) | |
131 { | |
132 if (length() > maxChars) | |
133 return false; | |
134 appendByte(b[i]); | |
135 } | |
136 return true; | |
137 } | |
138 catch (IOException e) | |
139 { | |
140 throw new RuntimeException(e); | |
141 } | |
142 } | |
143 | |
144 protected void appendByte(byte b) throws IOException | |
145 { | |
146 | |
147 if (b > 0 && _state == UTF8_ACCEPT) | |
148 { | |
149 _appendable.append((char)(b & 0xFF)); | |
150 } | |
151 else | |
152 { | |
153 int i = b & 0xFF; | |
154 int type = BYTE_TABLE[i]; | |
155 _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6); | |
156 int next = TRANS_TABLE[_state + type]; | |
157 | |
158 switch(next) | |
159 { | |
160 case UTF8_ACCEPT: | |
161 _state=next; | |
162 if (_codep < Character.MIN_HIGH_SURROGATE) | |
163 { | |
164 _appendable.append((char)_codep); | |
165 } | |
166 else | |
167 { | |
168 for (char c : Character.toChars(_codep)) | |
169 _appendable.append(c); | |
170 } | |
171 break; | |
172 | |
173 case UTF8_REJECT: | |
174 String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12); | |
175 _codep=0; | |
176 _state = UTF8_ACCEPT; | |
177 _appendable.append(REPLACEMENT); | |
178 throw new NotUtf8Exception(reason); | |
179 | |
180 default: | |
181 _state=next; | |
182 | |
183 } | |
184 } | |
185 } | |
186 | |
187 public boolean isUtf8SequenceComplete() | |
188 { | |
189 return _state == UTF8_ACCEPT; | |
190 } | |
191 | |
192 public static class NotUtf8Exception extends IllegalArgumentException | |
193 { | |
194 public NotUtf8Exception(String reason) | |
195 { | |
196 super("Not valid UTF8! "+reason); | |
197 } | |
198 } | |
199 | |
200 protected void checkState() | |
201 { | |
202 if (!isUtf8SequenceComplete()) | |
203 { | |
204 _codep=0; | |
205 _state = UTF8_ACCEPT; | |
206 try | |
207 { | |
208 _appendable.append(REPLACEMENT); | |
209 } | |
210 catch(IOException e) | |
211 { | |
212 throw new RuntimeException(e); | |
213 } | |
214 throw new NotUtf8Exception("incomplete UTF8 sequence"); | |
215 } | |
216 } | |
217 | |
218 public String toReplacedString() | |
219 { | |
220 if (!isUtf8SequenceComplete()) | |
221 { | |
222 _codep=0; | |
223 _state = UTF8_ACCEPT; | |
224 try | |
225 { | |
226 _appendable.append(REPLACEMENT); | |
227 } | |
228 catch(IOException e) | |
229 { | |
230 throw new RuntimeException(e); | |
231 } | |
232 Throwable th= new NotUtf8Exception("incomplete UTF8 sequence"); | |
233 LOG.warn(th.toString()); | |
234 LOG.debug(th); | |
235 } | |
236 return _appendable.toString(); | |
237 } | |
238 } |