|
1 package com.isode.stroke.stringcodecs; |
|
2 |
|
3 import java.util.Arrays; |
|
4 |
|
5 /** A very fast and memory efficient class to encode and decode to and from BASE64 in full accordance |
|
6 * with RFC 2045.<br><br> |
|
7 * On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 times faster |
|
8 * on small arrays (10 - 1000 bytes) and 2-3 times as fast on larger arrays (10000 - 1000000 bytes) |
|
9 * compared to <code>sun.misc.Encoder()/Decoder()</code>.<br><br> |
|
10 * |
|
11 * On byte arrays the encoder is about 20% faster than Jakarta Commons Base64 Codec for encode and |
|
12 * about 50% faster for decoding large arrays. This implementation is about twice as fast on very small |
|
13 * arrays (< 30 bytes). If source/destination is a <code>String</code> this |
|
14 * version is about three times as fast due to the fact that the Commons Codec result has to be recoded |
|
15 * to a <code>String</code> from <code>byte[]</code>, which is very expensive.<br><br> |
|
16 * |
|
17 * This encode/decode algorithm doesn't create any temporary arrays as many other codecs do, it only |
|
18 * allocates the resulting array. This produces less garbage and it is possible to handle arrays twice |
|
19 * as large as algorithms that create a temporary array. (E.g. Jakarta Commons Codec). It is unknown |
|
20 * whether Sun's <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays but since performance |
|
21 * is quite low it probably does.<br><br> |
|
22 * |
|
23 * The encoder produces the same output as the Sun one except that the Sun's encoder appends |
|
24 * a trailing line separator if the last character isn't a pad. Unclear why but it only adds to the |
|
25 * length and is probably a side effect. Both are in conformance with RFC 2045 though.<br> |
|
26 * Commons codec seem to always att a trailing line separator.<br><br> |
|
27 * |
|
28 * <b>Note!</b> |
|
29 * The encode/decode method pairs (types) come in three versions with the <b>exact</b> same algorithm and |
|
30 * thus a lot of code redundancy. This is to not create any temporary arrays for transcoding to/from different |
|
31 * format types. The methods not used can simply be commented out.<br><br> |
|
32 * |
|
33 * There is also a "fast" version of all decode methods that works the same way as the normal ones, but |
|
34 * har a few demands on the decoded input. Normally though, these fast verions should be used if the source if |
|
35 * the input is known and it hasn't bee tampered with.<br><br> |
|
36 * |
|
37 * If you find the code useful or you find a bug, please send me a note at base64 @ miginfocom . com. |
|
38 * |
|
39 * Licence (BSD): |
|
40 * ============== |
|
41 * |
|
42 * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom . com) |
|
43 * All rights reserved. |
|
44 * |
|
45 * Redistribution and use in source and binary forms, with or without modification, |
|
46 * are permitted provided that the following conditions are met: |
|
47 * Redistributions of source code must retain the above copyright notice, this list |
|
48 * of conditions and the following disclaimer. |
|
49 * Redistributions in binary form must reproduce the above copyright notice, this |
|
50 * list of conditions and the following disclaimer in the documentation and/or other |
|
51 * materials provided with the distribution. |
|
52 * Neither the name of the MiG InfoCom AB nor the names of its contributors may be |
|
53 * used to endorse or promote products derived from this software without specific |
|
54 * prior written permission. |
|
55 * |
|
56 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
|
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
58 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
|
59 * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, |
|
60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
|
61 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, |
|
62 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|
63 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
64 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY |
|
65 * OF SUCH DAMAGE. |
|
66 * |
|
67 * @version 2.2 |
|
68 * @author Mikael Grev |
|
69 * Date: 2004-aug-02 |
|
70 * Time: 11:31:11 |
|
71 */ |
|
72 |
|
73 public class Base64BSD |
|
74 { |
|
75 private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); |
|
76 private static final int[] IA = new int[256]; |
|
77 static { |
|
78 Arrays.fill(IA, -1); |
|
79 for (int i = 0, iS = CA.length; i < iS; i++) |
|
80 IA[CA[i]] = i; |
|
81 IA['='] = 0; |
|
82 } |
|
83 |
|
84 // **************************************************************************************** |
|
85 // * char[] version |
|
86 // **************************************************************************************** |
|
87 |
|
88 /** Encodes a raw byte array into a BASE64 <code>char[]</code> representation i accordance with RFC 2045. |
|
89 * @param sArr The bytes to convert. If <code>null</code> or length 0 an empty array will be returned. |
|
90 * @param lineSep Optional "\r\n" after 76 characters, unless end of file.<br> |
|
91 * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a |
|
92 * little faster. |
|
93 * @return A BASE64 encoded array. Never <code>null</code>. |
|
94 */ |
|
95 public final static char[] encodeToChar(byte[] sArr, boolean lineSep) |
|
96 { |
|
97 // Check special case |
|
98 int sLen = sArr != null ? sArr.length : 0; |
|
99 if (sLen == 0) |
|
100 return new char[0]; |
|
101 |
|
102 int eLen = (sLen / 3) * 3; // Length of even 24-bits. |
|
103 int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count |
|
104 int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array |
|
105 char[] dArr = new char[dLen]; |
|
106 |
|
107 // Encode even 24-bits |
|
108 for (int s = 0, d = 0, cc = 0; s < eLen;) { |
|
109 // Copy next three bytes into lower 24 bits of int, paying attension to sign. |
|
110 int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff); |
|
111 |
|
112 // Encode the int into four chars |
|
113 dArr[d++] = CA[(i >>> 18) & 0x3f]; |
|
114 dArr[d++] = CA[(i >>> 12) & 0x3f]; |
|
115 dArr[d++] = CA[(i >>> 6) & 0x3f]; |
|
116 dArr[d++] = CA[i & 0x3f]; |
|
117 |
|
118 // Add optional line separator |
|
119 if (lineSep && ++cc == 19 && d < dLen - 2) { |
|
120 dArr[d++] = '\r'; |
|
121 dArr[d++] = '\n'; |
|
122 cc = 0; |
|
123 } |
|
124 } |
|
125 |
|
126 // Pad and encode last bits if source isn't even 24 bits. |
|
127 int left = sLen - eLen; // 0 - 2. |
|
128 if (left > 0) { |
|
129 // Prepare the int |
|
130 int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0); |
|
131 |
|
132 // Set last four chars |
|
133 dArr[dLen - 4] = CA[i >> 12]; |
|
134 dArr[dLen - 3] = CA[(i >>> 6) & 0x3f]; |
|
135 dArr[dLen - 2] = left == 2 ? CA[i & 0x3f] : '='; |
|
136 dArr[dLen - 1] = '='; |
|
137 } |
|
138 return dArr; |
|
139 } |
|
140 |
|
141 /** Decodes a BASE64 encoded char array. All illegal characters will be ignored and can handle both arrays with |
|
142 * and without line separators. |
|
143 * @param sArr The source array. <code>null</code> or length 0 will return an empty array. |
|
144 * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters |
|
145 * (including '=') isn't divideable by 4. (I.e. definitely corrupted). |
|
146 */ |
|
147 public final static byte[] decode(char[] sArr) |
|
148 { |
|
149 // Check special case |
|
150 int sLen = sArr != null ? sArr.length : 0; |
|
151 if (sLen == 0) |
|
152 return new byte[0]; |
|
153 |
|
154 // Count illegal characters (including '\r', '\n') to know what size the returned array will be, |
|
155 // so we don't have to reallocate & copy it later. |
|
156 int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...) |
|
157 for (int i = 0; i < sLen; i++) // If input is "pure" (I.e. no line separators or illegal chars) base64 this loop can be commented out. |
|
158 if (IA[sArr[i]] < 0) |
|
159 sepCnt++; |
|
160 |
|
161 // Check so that legal chars (including '=') are evenly divideable by 4 as specified in RFC 2045. |
|
162 if ((sLen - sepCnt) % 4 != 0) |
|
163 return null; |
|
164 |
|
165 int pad = 0; |
|
166 for (int i = sLen; i > 1 && IA[sArr[--i]] <= 0;) |
|
167 if (sArr[i] == '=') |
|
168 pad++; |
|
169 |
|
170 int len = ((sLen - sepCnt) * 6 >> 3) - pad; |
|
171 |
|
172 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length |
|
173 |
|
174 for (int s = 0, d = 0; d < len;) { |
|
175 // Assemble three bytes into an int from four "valid" characters. |
|
176 int i = 0; |
|
177 for (int j = 0; j < 4; j++) { // j only increased if a valid char was found. |
|
178 int c = IA[sArr[s++]]; |
|
179 if (c >= 0) |
|
180 i |= c << (18 - j * 6); |
|
181 else |
|
182 j--; |
|
183 } |
|
184 // Add the bytes |
|
185 dArr[d++] = (byte) (i >> 16); |
|
186 if (d < len) { |
|
187 dArr[d++]= (byte) (i >> 8); |
|
188 if (d < len) |
|
189 dArr[d++] = (byte) i; |
|
190 } |
|
191 } |
|
192 return dArr; |
|
193 } |
|
194 |
|
195 /** Decodes a BASE64 encoded char array that is known to be resonably well formatted. The method is about twice as |
|
196 * fast as {@link #decode(char[])}. The preconditions are:<br> |
|
197 * + The array must have a line length of 76 chars OR no line separators at all (one line).<br> |
|
198 * + Line separator must be "\r\n", as specified in RFC 2045 |
|
199 * + The array must not contain illegal characters within the encoded string<br> |
|
200 * + The array CAN have illegal characters at the beginning and end, those will be dealt with appropriately.<br> |
|
201 * @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception. |
|
202 * @return The decoded array of bytes. May be of length 0. |
|
203 */ |
|
204 public final static byte[] decodeFast(char[] sArr) |
|
205 { |
|
206 // Check special case |
|
207 int sLen = sArr.length; |
|
208 if (sLen == 0) |
|
209 return new byte[0]; |
|
210 |
|
211 int sIx = 0, eIx = sLen - 1; // Start and end index after trimming. |
|
212 |
|
213 // Trim illegal chars from start |
|
214 while (sIx < eIx && IA[sArr[sIx]] < 0) |
|
215 sIx++; |
|
216 |
|
217 // Trim illegal chars from end |
|
218 while (eIx > 0 && IA[sArr[eIx]] < 0) |
|
219 eIx--; |
|
220 |
|
221 // get the padding count (=) (0, 1 or 2) |
|
222 int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count '=' at end. |
|
223 int cCnt = eIx - sIx + 1; // Content count including possible separators |
|
224 int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0; |
|
225 |
|
226 int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded bytes |
|
227 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length |
|
228 |
|
229 // Decode all but the last 0 - 2 bytes. |
|
230 int d = 0; |
|
231 for (int cc = 0, eLen = (len / 3) * 3; d < eLen;) { |
|
232 // Assemble three bytes into an int from four "valid" characters. |
|
233 int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12 | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]]; |
|
234 |
|
235 // Add the bytes |
|
236 dArr[d++] = (byte) (i >> 16); |
|
237 dArr[d++] = (byte) (i >> 8); |
|
238 dArr[d++] = (byte) i; |
|
239 |
|
240 // If line separator, jump over it. |
|
241 if (sepCnt > 0 && ++cc == 19) { |
|
242 sIx += 2; |
|
243 cc = 0; |
|
244 } |
|
245 } |
|
246 |
|
247 if (d < len) { |
|
248 // Decode last 1-3 bytes (incl '=') into 1-3 bytes |
|
249 int i = 0; |
|
250 for (int j = 0; sIx <= eIx - pad; j++) |
|
251 i |= IA[sArr[sIx++]] << (18 - j * 6); |
|
252 |
|
253 for (int r = 16; d < len; r -= 8) |
|
254 dArr[d++] = (byte) (i >> r); |
|
255 } |
|
256 |
|
257 return dArr; |
|
258 } |
|
259 |
|
260 // **************************************************************************************** |
|
261 // * byte[] version |
|
262 // **************************************************************************************** |
|
263 |
|
264 /** Encodes a raw byte array into a BASE64 <code>byte[]</code> representation i accordance with RFC 2045. |
|
265 * @param sArr The bytes to convert. If <code>null</code> or length 0 an empty array will be returned. |
|
266 * @param lineSep Optional "\r\n" after 76 characters, unless end of file.<br> |
|
267 * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a |
|
268 * little faster. |
|
269 * @return A BASE64 encoded array. Never <code>null</code>. |
|
270 */ |
|
271 public final static byte[] encodeToByte(byte[] sArr, boolean lineSep) |
|
272 { |
|
273 // Check special case |
|
274 int sLen = sArr != null ? sArr.length : 0; |
|
275 if (sLen == 0) |
|
276 return new byte[0]; |
|
277 |
|
278 int eLen = (sLen / 3) * 3; // Length of even 24-bits. |
|
279 int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count |
|
280 int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array |
|
281 byte[] dArr = new byte[dLen]; |
|
282 |
|
283 // Encode even 24-bits |
|
284 for (int s = 0, d = 0, cc = 0; s < eLen;) { |
|
285 // Copy next three bytes into lower 24 bits of int, paying attension to sign. |
|
286 int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff); |
|
287 |
|
288 // Encode the int into four chars |
|
289 dArr[d++] = (byte) CA[(i >>> 18) & 0x3f]; |
|
290 dArr[d++] = (byte) CA[(i >>> 12) & 0x3f]; |
|
291 dArr[d++] = (byte) CA[(i >>> 6) & 0x3f]; |
|
292 dArr[d++] = (byte) CA[i & 0x3f]; |
|
293 |
|
294 // Add optional line separator |
|
295 if (lineSep && ++cc == 19 && d < dLen - 2) { |
|
296 dArr[d++] = '\r'; |
|
297 dArr[d++] = '\n'; |
|
298 cc = 0; |
|
299 } |
|
300 } |
|
301 |
|
302 // Pad and encode last bits if source isn't an even 24 bits. |
|
303 int left = sLen - eLen; // 0 - 2. |
|
304 if (left > 0) { |
|
305 // Prepare the int |
|
306 int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0); |
|
307 |
|
308 // Set last four chars |
|
309 dArr[dLen - 4] = (byte) CA[i >> 12]; |
|
310 dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f]; |
|
311 dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '='; |
|
312 dArr[dLen - 1] = '='; |
|
313 } |
|
314 return dArr; |
|
315 } |
|
316 |
|
317 /** Decodes a BASE64 encoded byte array. All illegal characters will be ignored and can handle both arrays with |
|
318 * and without line separators. |
|
319 * @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception. |
|
320 * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters |
|
321 * (including '=') isn't divideable by 4. (I.e. definitely corrupted). |
|
322 */ |
|
323 public final static byte[] decode(byte[] sArr) |
|
324 { |
|
325 // Check special case |
|
326 int sLen = sArr.length; |
|
327 |
|
328 // Count illegal characters (including '\r', '\n') to know what size the returned array will be, |
|
329 // so we don't have to reallocate & copy it later. |
|
330 int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...) |
|
331 for (int i = 0; i < sLen; i++) // If input is "pure" (I.e. no line separators or illegal chars) base64 this loop can be commented out. |
|
332 if (IA[sArr[i] & 0xff] < 0) |
|
333 sepCnt++; |
|
334 |
|
335 // Check so that legal chars (including '=') are evenly divideable by 4 as specified in RFC 2045. |
|
336 if ((sLen - sepCnt) % 4 != 0) |
|
337 return null; |
|
338 |
|
339 int pad = 0; |
|
340 for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0;) |
|
341 if (sArr[i] == '=') |
|
342 pad++; |
|
343 |
|
344 int len = ((sLen - sepCnt) * 6 >> 3) - pad; |
|
345 |
|
346 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length |
|
347 |
|
348 for (int s = 0, d = 0; d < len;) { |
|
349 // Assemble three bytes into an int from four "valid" characters. |
|
350 int i = 0; |
|
351 for (int j = 0; j < 4; j++) { // j only increased if a valid char was found. |
|
352 int c = IA[sArr[s++] & 0xff]; |
|
353 if (c >= 0) |
|
354 i |= c << (18 - j * 6); |
|
355 else |
|
356 j--; |
|
357 } |
|
358 |
|
359 // Add the bytes |
|
360 dArr[d++] = (byte) (i >> 16); |
|
361 if (d < len) { |
|
362 dArr[d++]= (byte) (i >> 8); |
|
363 if (d < len) |
|
364 dArr[d++] = (byte) i; |
|
365 } |
|
366 } |
|
367 |
|
368 return dArr; |
|
369 } |
|
370 |
|
371 |
|
372 /** Decodes a BASE64 encoded byte array that is known to be resonably well formatted. The method is about twice as |
|
373 * fast as {@link #decode(byte[])}. The preconditions are:<br> |
|
374 * + The array must have a line length of 76 chars OR no line separators at all (one line).<br> |
|
375 * + Line separator must be "\r\n", as specified in RFC 2045 |
|
376 * + The array must not contain illegal characters within the encoded string<br> |
|
377 * + The array CAN have illegal characters at the beginning and end, those will be dealt with appropriately.<br> |
|
378 * @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception. |
|
379 * @return The decoded array of bytes. May be of length 0. |
|
380 */ |
|
381 public final static byte[] decodeFast(byte[] sArr) |
|
382 { |
|
383 // Check special case |
|
384 int sLen = sArr.length; |
|
385 if (sLen == 0) |
|
386 return new byte[0]; |
|
387 |
|
388 int sIx = 0, eIx = sLen - 1; // Start and end index after trimming. |
|
389 |
|
390 // Trim illegal chars from start |
|
391 while (sIx < eIx && IA[sArr[sIx] & 0xff] < 0) |
|
392 sIx++; |
|
393 |
|
394 // Trim illegal chars from end |
|
395 while (eIx > 0 && IA[sArr[eIx] & 0xff] < 0) |
|
396 eIx--; |
|
397 |
|
398 // get the padding count (=) (0, 1 or 2) |
|
399 int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count '=' at end. |
|
400 int cCnt = eIx - sIx + 1; // Content count including possible separators |
|
401 int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0; |
|
402 |
|
403 int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded bytes |
|
404 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length |
|
405 |
|
406 // Decode all but the last 0 - 2 bytes. |
|
407 int d = 0; |
|
408 for (int cc = 0, eLen = (len / 3) * 3; d < eLen;) { |
|
409 // Assemble three bytes into an int from four "valid" characters. |
|
410 int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12 | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]]; |
|
411 |
|
412 // Add the bytes |
|
413 dArr[d++] = (byte) (i >> 16); |
|
414 dArr[d++] = (byte) (i >> 8); |
|
415 dArr[d++] = (byte) i; |
|
416 |
|
417 // If line separator, jump over it. |
|
418 if (sepCnt > 0 && ++cc == 19) { |
|
419 sIx += 2; |
|
420 cc = 0; |
|
421 } |
|
422 } |
|
423 |
|
424 if (d < len) { |
|
425 // Decode last 1-3 bytes (incl '=') into 1-3 bytes |
|
426 int i = 0; |
|
427 for (int j = 0; sIx <= eIx - pad; j++) |
|
428 i |= IA[sArr[sIx++]] << (18 - j * 6); |
|
429 |
|
430 for (int r = 16; d < len; r -= 8) |
|
431 dArr[d++] = (byte) (i >> r); |
|
432 } |
|
433 |
|
434 return dArr; |
|
435 } |
|
436 |
|
437 // **************************************************************************************** |
|
438 // * String version |
|
439 // **************************************************************************************** |
|
440 |
|
441 /** Encodes a raw byte array into a BASE64 <code>String</code> representation i accordance with RFC 2045. |
|
442 * @param sArr The bytes to convert. If <code>null</code> or length 0 an empty array will be returned. |
|
443 * @param lineSep Optional "\r\n" after 76 characters, unless end of file.<br> |
|
444 * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a |
|
445 * little faster. |
|
446 * @return A BASE64 encoded array. Never <code>null</code>. |
|
447 */ |
|
448 public final static String encodeToString(byte[] sArr, boolean lineSep) |
|
449 { |
|
450 // Reuse char[] since we can't create a String incrementally anyway and StringBuffer/Builder would be slower. |
|
451 return new String(encodeToChar(sArr, lineSep)); |
|
452 } |
|
453 |
|
454 /** Decodes a BASE64 encoded <code>String</code>. All illegal characters will be ignored and can handle both strings with |
|
455 * and without line separators.<br> |
|
456 * <b>Note!</b> It can be up to about 2x the speed to call <code>decode(str.toCharArray())</code> instead. That |
|
457 * will create a temporary array though. This version will use <code>str.charAt(i)</code> to iterate the string. |
|
458 * @param str The source string. <code>null</code> or length 0 will return an empty array. |
|
459 * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters |
|
460 * (including '=') isn't divideable by 4. (I.e. definitely corrupted). |
|
461 */ |
|
462 public final static byte[] decode(String str) |
|
463 { |
|
464 // Check special case |
|
465 int sLen = str != null ? str.length() : 0; |
|
466 if (sLen == 0) |
|
467 return new byte[0]; |
|
468 |
|
469 // Count illegal characters (including '\r', '\n') to know what size the returned array will be, |
|
470 // so we don't have to reallocate & copy it later. |
|
471 int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...) |
|
472 for (int i = 0; i < sLen; i++) // If input is "pure" (I.e. no line separators or illegal chars) base64 this loop can be commented out. |
|
473 if (IA[str.charAt(i)] < 0) |
|
474 sepCnt++; |
|
475 |
|
476 // Check so that legal chars (including '=') are evenly divideable by 4 as specified in RFC 2045. |
|
477 if ((sLen - sepCnt) % 4 != 0) |
|
478 return null; |
|
479 |
|
480 // Count '=' at end |
|
481 int pad = 0; |
|
482 for (int i = sLen; i > 1 && IA[str.charAt(--i)] <= 0;) |
|
483 if (str.charAt(i) == '=') |
|
484 pad++; |
|
485 |
|
486 int len = ((sLen - sepCnt) * 6 >> 3) - pad; |
|
487 |
|
488 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length |
|
489 |
|
490 for (int s = 0, d = 0; d < len;) { |
|
491 // Assemble three bytes into an int from four "valid" characters. |
|
492 int i = 0; |
|
493 for (int j = 0; j < 4; j++) { // j only increased if a valid char was found. |
|
494 int c = IA[str.charAt(s++)]; |
|
495 if (c >= 0) |
|
496 i |= c << (18 - j * 6); |
|
497 else |
|
498 j--; |
|
499 } |
|
500 // Add the bytes |
|
501 dArr[d++] = (byte) (i >> 16); |
|
502 if (d < len) { |
|
503 dArr[d++]= (byte) (i >> 8); |
|
504 if (d < len) |
|
505 dArr[d++] = (byte) i; |
|
506 } |
|
507 } |
|
508 return dArr; |
|
509 } |
|
510 |
|
511 /** Decodes a BASE64 encoded string that is known to be resonably well formatted. The method is about twice as |
|
512 * fast as {@link #decode(String)}. The preconditions are:<br> |
|
513 * + The array must have a line length of 76 chars OR no line separators at all (one line).<br> |
|
514 * + Line separator must be "\r\n", as specified in RFC 2045 |
|
515 * + The array must not contain illegal characters within the encoded string<br> |
|
516 * + The array CAN have illegal characters at the beginning and end, those will be dealt with appropriately.<br> |
|
517 * @param s The source string. Length 0 will return an empty array. <code>null</code> will throw an exception. |
|
518 * @return The decoded array of bytes. May be of length 0. |
|
519 */ |
|
520 public final static byte[] decodeFast(String s) |
|
521 { |
|
522 // Check special case |
|
523 int sLen = s.length(); |
|
524 if (sLen == 0) |
|
525 return new byte[0]; |
|
526 |
|
527 int sIx = 0, eIx = sLen - 1; // Start and end index after trimming. |
|
528 |
|
529 // Trim illegal chars from start |
|
530 while (sIx < eIx && IA[s.charAt(sIx) & 0xff] < 0) |
|
531 sIx++; |
|
532 |
|
533 // Trim illegal chars from end |
|
534 while (eIx > 0 && IA[s.charAt(eIx) & 0xff] < 0) |
|
535 eIx--; |
|
536 |
|
537 // get the padding count (=) (0, 1 or 2) |
|
538 int pad = s.charAt(eIx) == '=' ? (s.charAt(eIx - 1) == '=' ? 2 : 1) : 0; // Count '=' at end. |
|
539 int cCnt = eIx - sIx + 1; // Content count including possible separators |
|
540 int sepCnt = sLen > 76 ? (s.charAt(76) == '\r' ? cCnt / 78 : 0) << 1 : 0; |
|
541 |
|
542 int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded bytes |
|
543 byte[] dArr = new byte[len]; // Preallocate byte[] of exact length |
|
544 |
|
545 // Decode all but the last 0 - 2 bytes. |
|
546 int d = 0; |
|
547 for (int cc = 0, eLen = (len / 3) * 3; d < eLen;) { |
|
548 // Assemble three bytes into an int from four "valid" characters. |
|
549 int i = IA[s.charAt(sIx++)] << 18 | IA[s.charAt(sIx++)] << 12 | IA[s.charAt(sIx++)] << 6 | IA[s.charAt(sIx++)]; |
|
550 |
|
551 // Add the bytes |
|
552 dArr[d++] = (byte) (i >> 16); |
|
553 dArr[d++] = (byte) (i >> 8); |
|
554 dArr[d++] = (byte) i; |
|
555 |
|
556 // If line separator, jump over it. |
|
557 if (sepCnt > 0 && ++cc == 19) { |
|
558 sIx += 2; |
|
559 cc = 0; |
|
560 } |
|
561 } |
|
562 |
|
563 if (d < len) { |
|
564 // Decode last 1-3 bytes (incl '=') into 1-3 bytes |
|
565 int i = 0; |
|
566 for (int j = 0; sIx <= eIx - pad; j++) |
|
567 i |= IA[s.charAt(sIx++)] << (18 - j * 6); |
|
568 |
|
569 for (int r = 16; d < len; r -= 8) |
|
570 dArr[d++] = (byte) (i >> r); |
|
571 } |
|
572 |
|
573 return dArr; |
|
574 } |
|
575 } |