Mercurial Hosting > luan
comparison src/org/eclipse/jetty/util/Utf8Appendable.java @ 802:3428c60d7cfc
replace jetty jars with source
| author | Franklin Schmidt <fschmidt@gmail.com> |
|---|---|
| date | Wed, 07 Sep 2016 21:15:48 -0600 |
| parents | |
| children | 8e9db0bbf4f9 |
comparison
equal
deleted
inserted
replaced
| 801:6a21393191c1 | 802:3428c60d7cfc |
|---|---|
| 1 // | |
| 2 // ======================================================================== | |
| 3 // Copyright (c) 1995-2014 Mort Bay Consulting Pty. Ltd. | |
| 4 // ------------------------------------------------------------------------ | |
| 5 // All rights reserved. This program and the accompanying materials | |
| 6 // are made available under the terms of the Eclipse Public License v1.0 | |
| 7 // and Apache License v2.0 which accompanies this distribution. | |
| 8 // | |
| 9 // The Eclipse Public License is available at | |
| 10 // http://www.eclipse.org/legal/epl-v10.html | |
| 11 // | |
| 12 // The Apache License v2.0 is available at | |
| 13 // http://www.opensource.org/licenses/apache2.0.php | |
| 14 // | |
| 15 // You may elect to redistribute this code under either of these licenses. | |
| 16 // ======================================================================== | |
| 17 // | |
| 18 | |
| 19 package org.eclipse.jetty.util; | |
| 20 | |
| 21 import java.io.IOException; | |
| 22 | |
| 23 import org.eclipse.jetty.util.log.Log; | |
| 24 import org.eclipse.jetty.util.log.Logger; | |
| 25 | |
| 26 /* ------------------------------------------------------------ */ | |
| 27 /** | |
| 28 * Utf8 Appendable abstract base class | |
| 29 * | |
| 30 * This abstract class wraps a standard {@link java.lang.Appendable} and provides methods to append UTF-8 encoded bytes, that are converted into characters. | |
| 31 * | |
| 32 * This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before state a character is appended to the string buffer. | |
| 33 * | |
| 34 * The UTF-8 decoding is done by this class and no additional buffers or Readers are used. The UTF-8 code was inspired by | |
| 35 * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ | |
| 36 * | |
| 37 * License information for Bjoern Hoehrmann's code: | |
| 38 * | |
| 39 * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> | |
| 40 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal | |
| 41 * in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 42 * copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: | |
| 43 * | |
| 44 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. | |
| 45 * | |
| 46 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | |
| 47 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER | |
| 48 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
| 49 **/ | |
| 50 public abstract class Utf8Appendable | |
| 51 { | |
| 52 protected static final Logger LOG = Log.getLogger(Utf8Appendable.class); | |
| 53 public static final char REPLACEMENT = '\ufffd'; | |
| 54 private static final int UTF8_ACCEPT = 0; | |
| 55 private static final int UTF8_REJECT = 12; | |
| 56 | |
| 57 protected final Appendable _appendable; | |
| 58 protected int _state = UTF8_ACCEPT; | |
| 59 | |
| 60 private static final byte[] BYTE_TABLE = | |
| 61 { | |
| 62 // The first part of the table maps bytes to character classes that | |
| 63 // to reduce the size of the transition table and create bitmasks. | |
| 64 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
| 65 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
| 66 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
| 67 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
| 68 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, | |
| 69 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, | |
| 70 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
| 71 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8 | |
| 72 }; | |
| 73 | |
| 74 private static final byte[] TRANS_TABLE = | |
| 75 { | |
| 76 // The second part is a transition table that maps a combination | |
| 77 // of a state of the automaton and a character class to a state. | |
| 78 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, | |
| 79 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, | |
| 80 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, | |
| 81 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, | |
| 82 12,36,12,12,12,12,12,12,12,12,12,12 | |
| 83 }; | |
| 84 | |
| 85 private int _codep; | |
| 86 | |
| 87 public Utf8Appendable(Appendable appendable) | |
| 88 { | |
| 89 _appendable = appendable; | |
| 90 } | |
| 91 | |
| 92 public abstract int length(); | |
| 93 | |
| 94 protected void reset() | |
| 95 { | |
| 96 _state = UTF8_ACCEPT; | |
| 97 } | |
| 98 | |
| 99 public void append(byte b) | |
| 100 { | |
| 101 try | |
| 102 { | |
| 103 appendByte(b); | |
| 104 } | |
| 105 catch (IOException e) | |
| 106 { | |
| 107 throw new RuntimeException(e); | |
| 108 } | |
| 109 } | |
| 110 | |
| 111 public void append(byte[] b, int offset, int length) | |
| 112 { | |
| 113 try | |
| 114 { | |
| 115 int end = offset + length; | |
| 116 for (int i = offset; i < end; i++) | |
| 117 appendByte(b[i]); | |
| 118 } | |
| 119 catch (IOException e) | |
| 120 { | |
| 121 throw new RuntimeException(e); | |
| 122 } | |
| 123 } | |
| 124 | |
| 125 public boolean append(byte[] b, int offset, int length, int maxChars) | |
| 126 { | |
| 127 try | |
| 128 { | |
| 129 int end = offset + length; | |
| 130 for (int i = offset; i < end; i++) | |
| 131 { | |
| 132 if (length() > maxChars) | |
| 133 return false; | |
| 134 appendByte(b[i]); | |
| 135 } | |
| 136 return true; | |
| 137 } | |
| 138 catch (IOException e) | |
| 139 { | |
| 140 throw new RuntimeException(e); | |
| 141 } | |
| 142 } | |
| 143 | |
| 144 protected void appendByte(byte b) throws IOException | |
| 145 { | |
| 146 | |
| 147 if (b > 0 && _state == UTF8_ACCEPT) | |
| 148 { | |
| 149 _appendable.append((char)(b & 0xFF)); | |
| 150 } | |
| 151 else | |
| 152 { | |
| 153 int i = b & 0xFF; | |
| 154 int type = BYTE_TABLE[i]; | |
| 155 _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6); | |
| 156 int next = TRANS_TABLE[_state + type]; | |
| 157 | |
| 158 switch(next) | |
| 159 { | |
| 160 case UTF8_ACCEPT: | |
| 161 _state=next; | |
| 162 if (_codep < Character.MIN_HIGH_SURROGATE) | |
| 163 { | |
| 164 _appendable.append((char)_codep); | |
| 165 } | |
| 166 else | |
| 167 { | |
| 168 for (char c : Character.toChars(_codep)) | |
| 169 _appendable.append(c); | |
| 170 } | |
| 171 break; | |
| 172 | |
| 173 case UTF8_REJECT: | |
| 174 String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12); | |
| 175 _codep=0; | |
| 176 _state = UTF8_ACCEPT; | |
| 177 _appendable.append(REPLACEMENT); | |
| 178 throw new NotUtf8Exception(reason); | |
| 179 | |
| 180 default: | |
| 181 _state=next; | |
| 182 | |
| 183 } | |
| 184 } | |
| 185 } | |
| 186 | |
| 187 public boolean isUtf8SequenceComplete() | |
| 188 { | |
| 189 return _state == UTF8_ACCEPT; | |
| 190 } | |
| 191 | |
| 192 public static class NotUtf8Exception extends IllegalArgumentException | |
| 193 { | |
| 194 public NotUtf8Exception(String reason) | |
| 195 { | |
| 196 super("Not valid UTF8! "+reason); | |
| 197 } | |
| 198 } | |
| 199 | |
| 200 protected void checkState() | |
| 201 { | |
| 202 if (!isUtf8SequenceComplete()) | |
| 203 { | |
| 204 _codep=0; | |
| 205 _state = UTF8_ACCEPT; | |
| 206 try | |
| 207 { | |
| 208 _appendable.append(REPLACEMENT); | |
| 209 } | |
| 210 catch(IOException e) | |
| 211 { | |
| 212 throw new RuntimeException(e); | |
| 213 } | |
| 214 throw new NotUtf8Exception("incomplete UTF8 sequence"); | |
| 215 } | |
| 216 } | |
| 217 | |
| 218 public String toReplacedString() | |
| 219 { | |
| 220 if (!isUtf8SequenceComplete()) | |
| 221 { | |
| 222 _codep=0; | |
| 223 _state = UTF8_ACCEPT; | |
| 224 try | |
| 225 { | |
| 226 _appendable.append(REPLACEMENT); | |
| 227 } | |
| 228 catch(IOException e) | |
| 229 { | |
| 230 throw new RuntimeException(e); | |
| 231 } | |
| 232 Throwable th= new NotUtf8Exception("incomplete UTF8 sequence"); | |
| 233 LOG.warn(th.toString()); | |
| 234 LOG.debug(th); | |
| 235 } | |
| 236 return _appendable.toString(); | |
| 237 } | |
| 238 } |
