1 package wjhk.jupload2.upload.helper;
2
3 import java.io.IOException;
4 import java.io.PushbackInputStream;
5 import java.util.regex.Matcher;
6 import java.util.regex.Pattern;
7
8 import wjhk.jupload2.exception.JUploadEOFException;
9 import wjhk.jupload2.exception.JUploadException;
10 import wjhk.jupload2.exception.JUploadIOException;
11 import wjhk.jupload2.policies.UploadPolicy;
12
13 /**
14 * A helper, to read the response coming from the server.
15 *
16 * @author etienne_sf
17 */
18 public class HTTPInputStreamReader {
19 // //////////////////////////////////////////////////////////////////////////////
20 // //////////////////// Main attributes
21 // //////////////////////////////////////////////////////////////////////////////
22
23 /**
24 * The current upload policy, always useful.
25 */
26 private UploadPolicy uploadPolicy = null;
27
28 private HTTPConnectionHelper httpConnectionHelper = null;
29
30 /**
31 * Contains the HTTP response bytearrayResponseBody, that is: the server
32 * response, without the headers.
33 */
34 String stringResponseBody = null;
35
36 private byte[] bytearrayResponseBody = new byte[0];
37
38 /**
39 * The headers of the HTTP response.
40 */
41 String responseHeaders = null;
42
43 /**
44 * The status message from the first line of the response (e.g. "200 OK").
45 */
46 String responseMsg = null;
47
48 // ////////////////////////////////////////////////////////////////////////////////////
49 // /////////////////// ATTRIBUTE CONTAINING DATA COMING FROM THE RESPONSE
50 // ////////////////////////////////////////////////////////////////////////////////////
51
52 private CookieJar cookies = null;
53
54 boolean gotClose = false;
55
56 private boolean gotChunked = false;
57
58 private boolean gotContentLength = false;
59
60 private int clen = 0;
61
62 /**
63 * The server HTTP response. Should be 200, in case of success.
64 */
65 private int httpStatusCode = 0;
66
67 private String line = "";
68
69 private String charset = "ISO-8859-1";
70
71 // ////////////////////////////////////////////////////////////////////////////////////
72 // /////////////////// CONSTANTS USED TO CONTROL THE HTTP INTPUT
73 // ////////////////////////////////////////////////////////////////////////////////////
74 private final static int CHUNKBUF_SIZE = 4096;
75
76 private final byte chunkbuf[] = new byte[CHUNKBUF_SIZE];
77
78 private final static Pattern pChunked = Pattern.compile(
79 "^Transfer-Encoding:\\s+chunked", Pattern.CASE_INSENSITIVE);
80
81 private final static Pattern pClose = Pattern.compile(
82 "^Connection:\\s+close", Pattern.CASE_INSENSITIVE);
83
84 private final static Pattern pProxyClose = Pattern.compile(
85 "^Proxy-Connection:\\s+close", Pattern.CASE_INSENSITIVE);
86
87 private final static Pattern pHttpStatus = Pattern
88 .compile("^HTTP/\\d\\.\\d\\s+((\\d+)\\s+.*)$");
89
90 private final static Pattern pContentLen = Pattern.compile(
91 "^Content-Length:\\s+(\\d+)$", Pattern.CASE_INSENSITIVE);
92
93 private final static Pattern pContentTypeCs = Pattern.compile(
94 "^Content-Type:\\s+.*;\\s*charset=([^;\\s]+).*$",
95 Pattern.CASE_INSENSITIVE);
96
97 private final static Pattern pSetCookie = Pattern.compile(
98 "^Set-Cookie:\\s+(.*)$", Pattern.CASE_INSENSITIVE);
99
100 /**
101 * The standard constructor: does nothing ! Oh yes, it initialize some
102 * attribute from the given parameter... :-)
103 *
104 * @param httpConnectionHelper The connection helper, associated with this
105 * instance.
106 * @param uploadPolicy The current upload policy.
107 */
108 public HTTPInputStreamReader(HTTPConnectionHelper httpConnectionHelper,
109 UploadPolicy uploadPolicy) {
110 this.httpConnectionHelper = httpConnectionHelper;
111 this.uploadPolicy = uploadPolicy;
112 this.cookies = new CookieJar(uploadPolicy);
113 }
114
115 /**
116 * Return the last read http response (200, in case of success).
117 *
118 * @return The last read http response
119 */
120 public int gethttpStatusCode() {
121 return this.httpStatusCode;
122 }
123
124 /**
125 * Get the last response bytearrayResponseBody.
126 *
127 * @return The last read response bytearrayResponseBody.
128 */
129 public String getResponseBody() {
130 return this.stringResponseBody;
131 }
132
133 /**
134 * Get the last response bytearrayResponseBody. This byte array should be
135 * decoded by using the {@link #getResponseCharset()}.
136 *
137 * @return The last read response bytearrayResponseBody.
138 *
139 */
140 public byte[] getResponseBodyAsByteArray() {
141 return this.bytearrayResponseBody;
142 }
143
144 /**
145 * Get the charset that should be used to decode the last response, when
146 * using the {@link #getResponseBodyAsByteArray()} method.
147 *
148 * @return The last read response bytearrayResponseBody.
149 */
150 public String getResponseCharset() {
151 return this.charset;
152 }
153
154 /**
155 * Get the headers of the HTTP response.
156 *
157 * @return The HTTP headers.
158 */
159 public String getResponseHeaders() {
160 return this.responseHeaders;
161 }
162
163 /**
164 * Get the last response message.
165 *
166 * @return The response message from the first line of the response (e.g.
167 * "200 OK").
168 */
169 public String getResponseMsg() {
170 return this.responseMsg;
171 }
172
173 /**
174 * The main method: reads the response in the input stream.
175 *
176 * @return The response status (e.g.: 200 if everything was ok)
177 * @throws JUploadException
178 */
179 public int readHttpResponse() throws JUploadException {
180 PushbackInputStream httpDataIn = this.httpConnectionHelper
181 .getInputStream();
182
183 try {
184 // We first read the headers,
185 readHeaders(httpDataIn);
186
187 // then the bytearrayResponseBody.
188 // If we're in a HEAD request ... we're not interested in the
189 // bytearrayResponseBody!
190 if (this.httpConnectionHelper.getMethod().equals("HEAD")) {
191 this.uploadPolicy
192 .displayDebug(
193 "This is a HEAD request: we don't care about the bytearrayResponseBody",
194 70);
195 this.stringResponseBody = "";
196 } else {
197 readBody(httpDataIn);
198 }
199 } catch (JUploadException e) {
200 throw e;
201 } catch (Exception e) {
202 throw new JUploadException(e);
203 }
204
205 return this.httpStatusCode;
206 }
207
208 // //////////////////////////////////////////////////////////////////////////////////////
209 // //////////////////// Various utilities
210 // //////////////////////////////////////////////////////////////////////////////////////
211
212 /**
213 * Concatenates two byte arrays.
214 *
215 * @param buf1 The first array
216 * @param buf2 The second array
217 * @return A byte array, containing buf2 appended to buf2
218 */
219 static byte[] byteAppend(byte[] buf1, byte[] buf2) {
220 byte[] ret = new byte[buf1.length + buf2.length];
221 System.arraycopy(buf1, 0, ret, 0, buf1.length);
222 System.arraycopy(buf2, 0, ret, buf1.length, buf2.length);
223 return ret;
224 }
225
226 /**
227 * Concatenates two byte arrays.
228 *
229 * @param buf1 The first array
230 * @param buf2 The second array
231 * @param len Number of bytes to copy from buf2
232 * @return A byte array, containing buf2 appended to buf2
233 */
234 static byte[] byteAppend(byte[] buf1, byte[] buf2, int len) {
235 if (len > buf2.length)
236 len = buf2.length;
237 byte[] ret = new byte[buf1.length + len];
238 System.arraycopy(buf1, 0, ret, 0, buf1.length);
239 System.arraycopy(buf2, 0, ret, buf1.length, len);
240 return ret;
241 }
242
243 /**
244 * Similar like BufferedInputStream#readLine() but operates on raw bytes.
245 * Line-Ending is <b>always</b> "\r\n".
246 *
247 * @param inputStream
248 *
249 * @param charset The input charset of the stream.
250 * @param includeCR Set to true, if the terminating CR/LF should be included
251 * in the returned byte array.
252 * @return The line, encoded from the input stream with the given charset
253 * @throws IOException
254 * @throws JUploadException
255 */
256 public static String readLine(PushbackInputStream inputStream,
257 String charset, boolean includeCR) throws IOException,
258 JUploadException {
259 byte[] line = readLine(inputStream, includeCR);
260 return (null == line) ? null : new String(line, charset);
261 }
262
263 /**
264 * Similar like BufferedInputStream#readLine() but operates on raw bytes.
265 * According to RFC 2616, and of line may be CR (13), LF (10) or CRLF.
266 * Line-Ending is <b>always</b> "\r\n" in header, but not in text bodies.
267 * Update done by TedA (sourceforge account: tedaaa). Allows to manage
268 * response from web server that send LF instead of CRLF ! Here is a part of
269 * the RFC: <I>"we recommend that applications, when parsing such headers,
270 * recognize a single LF as a line terminator and ignore the leading
271 * CR"</I>. <BR>
272 * Corrected again to manage line finished by CR only. This is not allowed
273 * in headers, but this method is also used to read lines in the
274 * bytearrayResponseBody.
275 *
276 * @param inputStream
277 *
278 * @param includeCR Set to true, if the terminating CR/LF should be included
279 * in the returned byte array. In this case, CR/LF is always
280 * returned to the caller, whether the input stream got CR, LF or
281 * CRLF.
282 * @return The byte array from the input stream, with or without a trailing
283 * CRLF
284 * @throws IOException
285 * @throws JUploadException
286 */
287 public static byte[] readLine(PushbackInputStream inputStream,
288 boolean includeCR) throws IOException, JUploadException {
289 final byte EOS = -1;
290 final byte CR = 13;
291 final byte LF = 10;
292 int len = 0;
293 int buflen = 128; // average line length
294 byte[] buf = new byte[buflen];
295 byte[] ret = null;
296 int b;
297 boolean lineRead = false;
298
299 while (!lineRead) {
300 try {
301 b = inputStream.read();
302 } catch (IOException ioe) {
303 throw new JUploadIOException(ioe.getClass().getName() + ": "
304 + ioe.getMessage()
305 + " (while reading server response )", ioe);
306 } catch (Exception e) {
307 throw new JUploadException(e.getClass().getName() + ": "
308 + e.getMessage() + " (while reading server response )",
309 e);
310 }
311 switch (b) {
312 case EOS:
313 // We've finished reading the stream, and so the line is
314 // finished too.
315 if (len == 0) {
316 return null;
317 }
318 lineRead = true;
319 break;
320 /*
321 * if (len > 0) { ret = new byte[len]; System.arraycopy(buf, 0,
322 * ret, 0, len); return ret; } return null;
323 */
324 case LF:
325 // We found the end of the current line.
326 lineRead = true;
327 break;
328 case CR:
329 // We got a CR. It can be the end of line.
330 // Is it followed by a LF ? (not mandatory in RFC 2616)
331 b = inputStream.read();
332
333 if (b != LF) {
334 // The end of line was a simple LF: the next one blongs
335 // to the next line.
336 inputStream.unread(b);
337 }
338 lineRead = true;
339 break;
340 default:
341 buf[len++] = (byte) b;
342 // If the buffer is too small, we let enough space to add CR
343 // and LF, in case of ...
344 if (len + 2 >= buflen) {
345 buflen *= 2;
346 byte[] tmp = new byte[buflen];
347 System.arraycopy(buf, 0, tmp, 0, len);
348 buf = tmp;
349 }
350 }
351 } // while
352
353 // Let's go back to before any CR and LF.
354 while (len > 0 && (buf[len] == CR || buf[len] == LF)) {
355 len -= 1;
356 }
357
358 // Ok, now len indicates the end of the actual line.
359 // Should we add a proper CRLF, or nothing ?
360 if (includeCR) {
361 // We have enough space to add these two characters (see the default
362 // here above)
363 buf[len++] = CR;
364 buf[len++] = LF;
365 }
366
367 if (len > 0) {
368 ret = new byte[len];
369 if (len > 0)
370 System.arraycopy(buf, 0, ret, 0, len);
371 } else {
372 // line feed for empty line between headers and
373 // bytearrayResponseBody, or within the
374 // bytearrayResponseBody.
375 ret = new byte[0];
376 }
377 return ret;
378 }
379
380 /**
381 * Read the headers from the given input stream.
382 *
383 * @param httpDataIn The http input stream
384 * @throws IOException
385 * @throws JUploadException
386 */
387 private void readHeaders(PushbackInputStream httpDataIn)
388 throws IOException, JUploadException {
389 StringBuffer sbHeaders = new StringBuffer();
390 // Headers are US-ASCII (See RFC 2616, Section 2.2)
391 String tmp;
392 // We must be reading the first line of the HTTP header.
393 this.uploadPolicy.displayDebug(
394 "-------- Response Headers Start --------", 80);
395
396 do {
397 tmp = readLine(httpDataIn, "US-ASCII", false);
398 if (null == tmp) {
399 throw new JUploadEOFException(this.uploadPolicy,
400 "reading headers");
401 }
402 if (this.httpStatusCode == 0) {
403 Matcher m = pHttpStatus.matcher(tmp);
404 if (m.matches()) {
405 this.httpStatusCode = Integer.parseInt(m.group(2));
406 this.responseMsg = m.group(1);
407 } else {
408 // The status line must be the first line of the
409 // response. (See RFC 2616, Section 6.1) so this
410 // is an error.
411
412 // We first display the wrong line.
413 this.uploadPolicy.displayDebug("First line of response: '"
414 + tmp + "'", 80);
415 // Then, we throw the exception.
416 throw new JUploadException(
417 "HTTP response did not begin with status line.");
418 }
419 }
420 // Handle folded headers (RFC 2616, Section 2.2). This is
421 // handled after the status line, because that line may
422 // not be folded (RFC 2616, Section 6.1).
423 if (tmp.startsWith(" ") || tmp.startsWith("\t"))
424 this.line += " " + tmp.trim();
425 else
426 this.line = tmp;
427
428 // The read line is now correctly formatted.
429 this.uploadPolicy.displayDebug(this.line, 80);
430 sbHeaders.append(tmp).append("\n");
431
432 if (pClose.matcher(this.line).matches())
433 this.gotClose = true;
434 if (pProxyClose.matcher(this.line).matches())
435 this.gotClose = true;
436 if (pChunked.matcher(this.line).matches())
437 this.gotChunked = true;
438 Matcher m = pContentLen.matcher(this.line);
439 if (m.matches()) {
440 this.gotContentLength = true;
441 this.clen = Integer.parseInt(m.group(1));
442 }
443 m = pContentTypeCs.matcher(this.line);
444 if (m.matches())
445 this.charset = m.group(1);
446 m = pSetCookie.matcher(this.line);
447 if (m.matches()) {
448 this.uploadPolicy.displayDebug(
449 "Calling this.cookies.parseCookieHeader, with parameter: "
450 + m.group(1), 80);
451 this.cookies.parseCookieHeader(m.group(1));
452 this.uploadPolicy.displayDebug("Cookie header parsed.", 80);
453 }
454 // RFC 2616, Section 6. Body is separated by the header with an
455 // empty line: so end of headers is an empty line.
456 } while (this.line.length() > 0);
457
458 this.responseHeaders = sbHeaders.toString();
459 this.uploadPolicy.displayDebug(
460 "--------- Response Headers End ---------", 80);
461 }// readHeaders()
462
463 /**
464 * Read the bytearrayResponseBody from the given input stream.
465 *
466 * @param httpDataIn The http input stream
467 * @throws IOException
468 * @throws JUploadException
469 * @throws JUploadException
470 */
471 private void readBody(PushbackInputStream httpDataIn) throws IOException,
472 JUploadException {
473 // && is evaluated from left to right so !stop must come first!
474 while ((!this.gotContentLength) || (this.clen > 0)) {
475 if (this.gotChunked) {
476 // Read the chunk header.
477 // This is US-ASCII! (See RFC 2616, Section 2.2)
478 this.line = readLine(httpDataIn, "US-ASCII", false);
479 if (null == this.line)
480 throw new JUploadEOFException(this.uploadPolicy,
481 "reading HTTP Body, HTTP chunked mode (1)");
482 // Handle a single chunk of the response
483 // We cut off possible chunk extensions and ignore them.
484 // The length is hex-encoded (RFC 2616, Section 3.6.1)
485 int len = Integer.parseInt(this.line.replaceFirst(";.*", "")
486 .trim(), 16);
487 this.uploadPolicy.displayDebug("Chunk: " + this.line + " dec: "
488 + len, 70);
489 if (len == 0) {
490 // RFC 2616, Section 3.6.1: A length of 0 denotes
491 // the last chunk of the bytearrayResponseBody.
492
493 // This code wrong if the server sends chunks with trailers!
494 // (trailers are HTTP Headers that are send *after* the
495 // bytearrayResponseBody. These are announced
496 // in the regular HTTP header "Trailer".
497 // Fritz: Never seen them so far ...
498 // TODO: Implement trailer-handling.
499 break;
500 }
501
502 // Loop over the chunk (len == length of the chunk)
503 while (len > 0) {
504 int rlen = (len > CHUNKBUF_SIZE) ? CHUNKBUF_SIZE : len;
505 int ofs = 0;
506 if (rlen > 0) {
507 while (ofs < rlen) {
508 int res = httpDataIn.read(this.chunkbuf, ofs, rlen
509 - ofs);
510 if (res < 0)
511 throw new JUploadEOFException(
512 this.uploadPolicy,
513 "reading body, HTTP chunk mode (2)");
514 len -= res;
515 ofs += res;
516 }
517 if (ofs < rlen)
518 throw new JUploadException("short read");
519 if (rlen < CHUNKBUF_SIZE)
520 this.bytearrayResponseBody = byteAppend(
521 this.bytearrayResponseBody, this.chunkbuf,
522 rlen);
523 else
524 this.bytearrayResponseBody = byteAppend(
525 this.bytearrayResponseBody, this.chunkbuf);
526 }
527 }
528 // Got the whole chunk, read the trailing CRLF.
529 readLine(httpDataIn, false);
530 } else {
531 // Not chunked. Use either content-length (if available)
532 // or read until EOF.
533 if (this.gotContentLength) {
534 // Got a Content-Length. Read exactly that amount of
535 // bytes.
536 while (this.clen > 0) {
537 int rlen = (this.clen > CHUNKBUF_SIZE) ? CHUNKBUF_SIZE
538 : this.clen;
539 int ofs = 0;
540 if (rlen > 0) {
541 while (ofs < rlen) {
542 int res = httpDataIn.read(this.chunkbuf, ofs,
543 rlen - ofs);
544 if (res < 0)
545 throw new JUploadEOFException(
546 this.uploadPolicy,
547 "reading HTTP bytearrayResponseBody, not chunked mode");
548 this.clen -= res;
549 ofs += res;
550 }
551 if (ofs < rlen)
552 throw new JUploadException("short read");
553 if (rlen < CHUNKBUF_SIZE)
554 this.bytearrayResponseBody = byteAppend(
555 this.bytearrayResponseBody,
556 this.chunkbuf, rlen);
557 else
558 this.bytearrayResponseBody = byteAppend(
559 this.bytearrayResponseBody,
560 this.chunkbuf);
561 }
562 }
563 } else {
564 // No Content-length available, read until EOF
565 //
566 while (true) {
567 byte[] lbuf = readLine(httpDataIn, true);
568 if (null == lbuf)
569 break;
570 this.bytearrayResponseBody = byteAppend(
571 this.bytearrayResponseBody, lbuf);
572 }
573 break;
574 }
575 }
576 } // while
577
578 // Convert the whole bytearrayResponseBody according to the charset.
579 // The default for charset ISO-8859-1, but overridden by
580 // the charset attribute of the Content-Type header (if any).
581 // See RFC 2616, Sections 3.4.1 and 3.7.1.
582 this.stringResponseBody = new String(this.bytearrayResponseBody,
583 this.charset);
584
585 // At the higher debug level, we display the response.
586 this.uploadPolicy.displayDebug("-------- Response Body Start --------",
587 99);
588 this.uploadPolicy.displayDebug(this.stringResponseBody, 99);
589 this.uploadPolicy.displayDebug("-------- Response Body End --------",
590 99);
591 }// readBody
592 }